library(readr)
library(qvalue)
library(explore)
library(maditr)
library(GEOquery)
library(Matrix)
library(ggplot2)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(celldex)
library(biomaRt)
library(org.Hs.eg.db)
library(plotly)
library(DESeq2)
library(msigdbr)
library(ape)
library(GSVA)
library(sva)
library(readxl)
library(clusterProfiler)
library(pheatmap)
library(gplots)
library(EnhancedVolcano)
library(metaRNASeq)
library(ggbeeswarm)
library(editData)
library(tidyverse)
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
library(esquisse)
library(corto)
library(circlize)
library(ComplexHeatmap)
library(janitor)

1. Preparação dos dados

2. Definir padrões de figuras

####Cores

#Vaccines
ann_colors = list(
  vaccine = c(
  "BBIBP" = "#dee2e6",
  "BNT" = "#56cfe1",
  "ChAd" = "#80ffdb",
  "ChAd-BNT" = "#5e60ce",
  "ZF2001" = "#b5179e",
  "INFECTION" = "#ff4d6d",
  "INFECTION-VACCINE"= "#ffccd5"),
  dose = c(
    "NA" = "white",
    "0" = "white",
    "1" = "#56cfe1",
    "2" = "#5978d4",
    "3" = "#b5179e"),
  day = c(
    "0" = "white",
    "1" = "#caf0f8",
    "2" = "#ade8f4",
    "3" = "#90e0ef",
    "4" = "#6CD5EA",
    "5" = "#48cae4",
    "6" = "#00b4d8",
    "7" = "#0096c7",
    "10" = "#0087BF",
    "14" = "#0077b6",
    "26" = "#015BA0",
    "28" = "#023e8a",
    "51" = "#03045e"),
  type = c('IN'= "#e5e5e5", #1st Gen  vaccines
           'SU' = "#00a896",
           'VV' = "#72efdd", #3rd Gen vaccines
           'RNA' = "#56cfe1",
           'VV-RNA' = "#5e60ce", #Heterologous
           'IN-SU' = "#b5179e",
           'INFECTION'= "#da1e37"), #Infection
  regimen = c('HO' = "grey90",
              'HE' = "#8d99ae",
              "V-I-V" = "#36248f",
              "V-I" = "#D7B0EE",
              "I-V-I" = "#7400b8",
              "I-I" = "#5390d9",
              "I" = "#64dfdf"),
  infection = c("none" = "white",
                "1" = "#56cfe1",
                "2" = "#5e60ce"),
  variant = c("NA" = "white",
              "Beta" = "#72efdd",
              "Omicron" = "#5e60ce"),
  severity = c("A (9%) MI (81%) MO (0) S (4%)" = "#caf0f8",
               "A (29%) MI (57%) MO (14) S (0%)" = "#56cfe1",
               "A (0%) MI (89%) MO (11%) S (0%)" = "#72efdd",
               "A (9%) MI (47%) MO (21%) S (3%)" = "#64dfdf"
               "S" = "#5e60ce",
Error: unexpected string constant in:
"               "A (9%) MI (47%) MO (21%) S (3%)" = "#64dfdf"
               "S""

3. Seleção de estudos

# GSE189039
# GSE201530

3.1 Baixar dados

Ler arquivos e transformar em tabela

Abra a lista do estudo e analise suas informações. Ao descompactar, surgem diversos arquivos “.featureCounts.txt.gz”, que devem ser descompactados. Para descompactar a lista de arquivos, use um loop.

4. Análise de dados

4.1. Análise de expressão genica diferencial

Padronizando tabela de counts

Metadata

A tabela de counts fornecida pelo estudo é nomeada com o Subject ID + Timepoint e não com o codigo GSM. Por isso, teremos de tratar a tabela de metadados. Além disso, ela não possui a coluna de idade. É possível manipular estes dados no excel.

DESEQ2

Preparando dados

GSE201530

###### Padronize as variáveis 
countData <- gse201530_counts_ready
Error: object 'gse201530_counts_ready' not found

Executando DESEQ2

Comparando tempos e condições

Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos “contrast” ou “name”.

#H-BNT-I
# contrast = c("disease_time", "H.BNT.I_D1", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D2", "H_D0") (NAO DEU)
# contrast = c("disease_time", "H.BNT.I_D3", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D4", "H_D0")

#I-BNT-I
# contrast = c("disease_time", "I.BNT.I_D2", "H_D0")
# contrast = c("disease_time", "I.BNT.I_D5", "H_D0")

#H-I
# contrast = c("disease_time", "H.I_D1", "H_D0")

#H-I-I
# contrast = c("disease_time", "H.I.I_D2", "H_D0")
#I-I
# contrast = c("disease_time", "I.I_D1", "H_D0")
# contrast = c("disease_time", "I.I_D3", "H_D0")
# contrast = c("disease_time", "I.I_D5", "H_D0")

H_BNT_I

#H_BNT_I_D1
gse201530_H_BNT_I_D1 = results(dds, contrast = c("disease_time", "H.BNT.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D1)",
         day = 1,
         vaccine = "H-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")
Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': disease_time should be the name of a factor in the colData of the DESeqDataSet

I_BNT_I

######### I_BNT_I

#D2
gse201530_I_BNT_I_D2 = results(dds, contrast = c("disease_time", "I.BNT.I_D2", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-BNT-I (D2)",
         day = 2,
         vaccine = "I-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_BNT_I_D2, file = "gse201530_I_BNT_I_D2_DEGs.csv")


#D5
gse201530_I_BNT_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
  as.data.frame() %>%
  filter(padj < 0.05) %>%
  arrange(padj) %>%
  mutate(condition = "I-BNT-I (D5)",
         day = 5,
         vaccine = "I-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL"))) %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_BNT_I_D5, file = "gse201530_H_BNT_I_D5_DEGs.csv")

gse201530_I_BNT_I = bind_rows(gse201530_I_BNT_I_D2,
                            gse201530_I_BNT_I_D5)

write.csv(gse201530_I_BNT_I, "gse201530_I_BNT_I.csv")

H-I

######### H-I

#D1
gse201530_H_I_D1 = results(dds, contrast = c("disease_time", "H.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D1)",
         day = 1,
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_D1, file = "gse201530_H_I_D1_DEGs-PF.csv")


#D5
gse201530_H_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
  as.data.frame() %>%
  filter(padj < 0.05) %>%
  arrange(padj) %>%
  mutate(condition = "H-I (D5)",
         day = 5,
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL"))) %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_D5, file = "gse201530_H_I_D5_DEGs-PF.csv")

gse201530_H_I = bind_rows(gse201530_I_BNT_I_D2,
                            gse201530_I_BNT_I_D5)

write.csv(gse201530_H_I, "gse201530_H_I.csv")

H-I-I


######### H-I-I
#D2
gse201530_H_I_I_D2 = results(dds, contrast = c("disease_time", "H.I.I_D2", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I-I (D2)",
         day = 2,
         vaccine = "H-I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_I_D2, file = "gse201530_H_I_I_D2_DEGs-PF.csv")

I-I


######### I-I
#D1
gse201530_I_I_D1 = results(dds, contrast = c("disease_time", "I.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D1)",
         day = 1,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D1, file = "gse201530_I_I_D1_DEGs-PF.csv")


#D3
gse201530_I_I_D3 = results(dds, contrast = c("disease_time", "I.I_D3", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D3)",
         day = 3,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D3, file = "gse201530_I_I_D3_DEGs-PF.csv")


#D5
gse201530_I_I_D5 = results(dds, contrast = c("disease_time", "I.I_D5", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D5)",
         day = 5,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D5, file = "gse201530_I_I_D5_DEGs-PF.csv")

gse201530_I_I = bind_rows(gse201530_I_I_D1,
                          gse201530_I_I_D3,
                          gse201530_I_I_D5)

Unir raw datasets

########## GSE201530

gse201530_DEGs = bind_rows(gse201530_I_I,
                           gse201530_H_I_I_D2,
                           gse201530_H_I,
                           gse201530_I_BNT_I,
                           gse201530_H_BNT_I) %>% 
  mutate(study = "GSE201530")

#Salvar
write.csv(gse201530_DEGs, file = "gse201530_DEGs_27-11-23.csv")

GSE201530_metadata_conditions = gse201530_DEGs %>% 
  select(condition:vaccine, study) %>% 
  distinct()


#Anotações
ann_vaccines_lower = ann_vaccines %>% 
  clean_names()

ann_vaccines_27_11_23 = bind_rows(ann_vaccines_lower, 
                                  GSE201530_metadata_conditions)

#Salvar
write.csv(ann_vaccines_27_11_23, file = 'ann_vaccines_27_11_23.csv')

GSE189039

Comparando tempos e condições

Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos “contrast” ou “name”.

H_I

######### H_I

#H_I_D10
gse189039_H_I_moderate_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D10, moderate)",
         day = 10,
         vaccine = "H-I",
         severity = "moderate",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D10, file = "gse189039_H_I_moderate_D10_DEGs.csv")

#H_I_D26
gse189039_H_I_moderate_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D26, moderate)",
         day = 26,
         severity = "moderate",
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D26, file = "gse189039_H_I_moderate_D26_DEGs.csv")

#H_I_D51
gse189039_H_I_moderate_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D51", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D51, moderate)",
         day = 51,
         vaccine = "H-I",
         severity = "moderate",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D51, file = "gse189039_H_I_moderate_D51_DEGs.csv")

#H_I_severe_D10
gse189039_H_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D10, severe)",
         day = 10,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D10, file = "gse189039_H_I_severe_D10_DEGs.csv")



#H_I_severe_D26
gse189039_H_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D26, severe)",
         day = 26,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D26, file = "gse189039_H_I_severe_D26_DEGs.csv")

#H_I_severe_D51
gse189039_H_I_severe_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D51", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D51, severe)",
         day = 51,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D51, file = "gse189039_H_I_severe_D51_DEGs.csv")

gse189039_H_I = bind_rows(gse189039_H_I_moderate_D10,
                          gse189039_H_I_severe_D10,
                            gse189039_H_I_moderate_D26,
                          gse189039_H_I_severe_D26,
                            gse189039_H_I_moderate_D51,
                          gse189039_H_I_severe_D51)

write.csv(gse189039_H_I, "gse189039_H_I_DEGS.csv")

H_BNT_I

######### H-BNT-I

#H_BNT-I

#MILD
gse189039_H_BNT_I_mild_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D10, mild)",
         day = 10,
         vaccine = "H-BNT-I",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D10, file = "gse189039_H_BNT_I_mild_D10_DEGs.csv")


gse189039_H_BNT_I_mild_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D26, mild)",
         day = 26,
         vaccine = "H-BNT-I",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D26, file = "gse189039_H_BNT_I_mild_D26_DEGs.csv")


#SEVERE

gse189039_H_BNT_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D10, severe)",
         day = 10,
         vaccine = "H-BNT-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D10, file = "gse189039_H_BNT_I_severe_D10_DEGs.csv")


gse189039_H_BNT_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D26, severe)",
         day = 26,
         vaccine = "H-BNT-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D26, file = "gse189039_H_BNT_I_severe_D26_DEGs.csv")

#UNIR
gse189039_H_BNT_I = bind_rows(gse189039_H_BNT_I_mild_D10,
                              gse189039_H_BNT_I_severe_D10,
                              gse189039_H_BNT_I_mild_D26,
                              gse189039_H_BNT_I_severe_D26)
#Salvar
write.csv(gse189039_H_BNT_I, file = "gse189039_H_BNT_I_DEGS.csv")

BNT-I-BNT

######### BNT-I-BNT 

# D51
gse189039_BNT_I_BNT_severe_D51 = results(dds, contrast = c("disease_vac_severity_time",
                                                         "BNT.I.BNT_severe_D51", 
                                                         "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "BNT-I-BNT (D51, severe)",
         day = 51,
         vaccine = "BNT-I-BNT",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_BNT_I_BNT_severe_D51, file = "gse189039_BNT_I_BNT_severe_D51_DEGs.csv")


#Aqui tive de usar o argumento name, pois contrast não estava funcionando (?)
gse189039_BNT_I_BNT_mild_D51 = results(dds, name= "disease_vac_severity_time_H_naive_vac_D0_vs_BNT.I.BNT_mild_D51") %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  mutate(log2FoldChange = ifelse(log2FoldChange > 0, -log2FoldChange, abs(log2FoldChange))) %>% 
  arrange(padj) %>% 
  mutate(condition = "BNT-I-BNT (D51, mild)",
         day = 51,
         vaccine = "BNT-I-BNT",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes") 

#Salvar tabela
write.csv(gse189039_BNT_I_BNT_mild_D51, file = "gse189039_BNT_I_BNT_mild_D51_DEGs.csv")

#Unir
gse189039_BNT_I_BNT = bind_rows(gse189039_BNT_I_BNT_mild_D51,
                                gse189039_BNT_I_BNT_severe_D51)


write.csv(gse189039_BNT_I_BNT, file = "gse189039_BNT_I_BNT_DEGS.csv")

UNIR

all_degs_p_05_vac_infected = bind_rows(degs_allstudies_updown_p_05,
                                       gse_201530_189039) %>% 
  mutate_all(~ replace(., is.na(.), "NA")) %>% 
  mutate(regimen = case_when(
regimen == "BNT-I-BNT" ~ "V-I-V",
regimen == "H-BNT-I" ~ "V-I",
regimen == "VAC-I" ~ "V-I",
regimen == "I-VAC-I" ~ "I-V-I",
regimen == "H-V-I" ~ "V-I",
regimen == "H-I-I" ~ "I-I",
regimen == "H-I" ~ "I",
TRUE ~ as.character(regimen)))
Error: object 'degs_allstudies_updown_p_05' not found

Comparar numero de participantes

Comparar sexo

Combinar tabela de genes sequenciados (raw)

genes_raw_combined %>% 
  group_by(study) %>% 
  summarise(study, 
            n_genes = n()) %>% 
  distinct()
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.`summarise()` has grouped output by 'study'. You can override using the `.groups` argument.

5. Visualização de dados

DEGs plot

5.1. Volcanoplot

Os volcanoplots das duas vacinas são os mesmos, mas espelhados. Isso significa que definir o nivel de referencia no fator de vacinas é importante para determinar as DEGs up e down. #### ASTRAZENECA

Astrazeneca


#All
volcano_res <- EnhancedVolcano(res, 
                                    lab = rownames(res),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - All timepoints - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_allfactores_AZ.png", width = 800, height = 600)
print(volcano_res_chad)
dev.off()

#T1-T0
volcano_res_chad_1_0 <- EnhancedVolcano(res_1_0_chad, 
                                    lab = rownames(res_1_0_chad),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - D1 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D1_AZ.png", width = 800, height = 600)
print(volcano_res_chad_1_0)
dev.off()

#T2-T0
volcano_res_chad_2_0 <- EnhancedVolcano(res_time0_time2_chad, 
                                    lab = rownames(res_time0_time2_chad),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - D2 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D2_AZ.png", width = 800, height = 600)
print(volcano_res_chad_2_0)
dev.off()

#Plotar mais de um gráfico
plot_grid(volcano_res_chad, volcano_res_chad_1_0, volcano_res_chad_2_0, ncol = 3) 

5.2. Heatmap

5.3. Diagrama de Venn com Heatmap

Tabela de overlaps de DEGs com teste exato de fisher

Determinar a proporção de DEGs compartilhadas entre condições diferentes e sua significância. O código abaixo deve ter como input uma tabela longa chamada “degs_all” (coluna 1: condicoes/vacinas, coluna 2: DEGs). O output será uma tabela longa com as colunas Vacina 1, Vacina 2, shared degs, not shared vacina 1, not shared vacina 2, lista com nomes das degs, valor p de fisher (teste exato de fisher).

pHeatmap

#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_numbers.png"), 
    width=3000, 
    height=2000, 
    res=315, 
    pointsize=10)
print(pheatmap_shared)
dev.off()
null device 
          1 
#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_nonumbers.png"), 
    width=3000, 
    height=2000, 
    res=315, 
    pointsize=10)
print(pheatmap_shared_nonumbers)
dev.off()
null device 
          1 

Chord diagram

#Chord diagram
install.packages("circlize")
library(circlize)

# sharedDEGs_UPDOWN_fisher_pvalue10
# sharedDEGs_UP_fisher_p_10_pvalue10
# sharedDEGs_DOWN_fisher_p_10_pvalue10

#Input
shared_genes_df_mirror_p10 = sharedDEGs_UP_fisher_p_10_pvalue10


#All doses
circos_alldoses = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(Shared != 0)

circos_alldoses_mirror = circos_alldoses %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_alldoses)

write.csv(circos_alldoses_mirror, file = "Circos_DOWN_Alldoses_pvalue10_dose1.csv")


#Dose 1
circos_dose1 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 1,
         cond2_dose == 1)

circos_dose1_mirror = circos_dose1 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose1)

write.csv(circos_dose1_mirror, file = "Circos_DOWN_pvalue10_dose1.csv")

#Dose 2
circos_dose2 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 2,
         cond2_dose == 2)
circos_dose2_mirror = circos_dose2 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose2) 

write.csv(circos_dose2_mirror, file = "Circos_DOWN_pvalue10_dose2.csv")

#Dose 3
circos_dose3 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 3,
         cond2_dose == 3)

 circos_dose3_mirror = circos_dose3 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose3) 

write.csv(circos_dose3_mirror, file = "Circos_DOWN_pvalue10_dose3.csv")

Visualização

Dotplot

#### Identidade e -log(q)
install.packages("ggdendro")
library(ggdendro)
library(cowplot)
library(ggtree)
library(patchwork) 

#Filtrar valores com -log(p) >= 1
sharedDEGs_UP_DOWN_fisher_filtered <- sharedDEGs_UP_DOWN_fisher %>%
  mutate(`Significance -log(p)` = cut(`-log(p)`, 
                        breaks = c(-Inf, 1, 1.3, 2, Inf), 
                        labels = c("<1", "1-1.3", "1.3-2", ">2"))) %>%
  filter(Percentage_Shared_Cond1 >= 1, Percentage_Shared_Cond2 >= 1, `Significance -log(p)` != "<1")

#Salvar
write.csv(sharedDEGs_UP_DOWN_fisher_filtered, file = "sharedDEGs_UP_DOWN_fisher_filtered.csv")

#Visualizar
plot = ggplot(sharedDEGs_UP_DOWN_fisher, aes(x = Cond1, y = Cond2, color = Percentage_Shared_Cond1, size = Percentage_Shared_Cond1)) + 
  geom_point() +
  scale_size_continuous(range = c(2, 8)) +
  geom_text(aes(label = sprintf("%.f", Percentage_Shared_Cond1)), vjust = 0.5, hjust = 0.5, size = 2, color = "black")  +
  cowplot::theme_cowplot() + 
  theme(axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10),
        axis.text.y = element_text(size = 10)) +
  scale_color_gradient(low = "white", high = "#ef233c", limits = c(0, 60)) +
  ggtitle("%Identity between vaccines, Up and Down-Regulated, pvalue < 0.05" ) +
  ylab('Cond2') +
  theme(axis.ticks = element_blank()) +
  guides(color = FALSE, size = FALSE)

#Salvar
ggsave(plot, file = "sharedDEGs_UP_DOWN_fisher.png")

#Display
plot

Single Sample GSEA (ssGSEA)

ImmuneGO

library(corto)
############## Arquivos necessários

#CellMarker_ImmuneCells.csv 
#ImmuneGO_Annotated_Genes_Nested_Interesting.csv
ann_vaccines #Anotação das vacinas

############## GSE199750
gse199750_metadata_annotated    = gse199750_metadata_annotated_16_11_23 %>% 
  mutate(condition = case_when(
    condition == "BNT (V1, D6)BNT" ~ "BNT (V1, D6)",
    condition == "BNT (V1, D6)MO" ~ "BNT-MO (V1, D6)",
    TRUE ~ condition)) %>% 
  select(sample, condition, geo, day, dose, age, sex)

#Salvar
write.csv(gse199750_metadata_annotated, file = "gse199750_metadata_annotated_22-11-23.csv")

gse199750_counts_ready #matriz de expressão


############## GSE201533

gse201533_metadata_annotated = gse201533_metadata_long_annotated_14_11_23 %>% #Metadados
  select(condition:vaccine)

gse201533_counts_ready #Matriz de expressão


############## GSE206023
gse206023_metadata_ready = gse206023_counts_long_annotated_14_11_23 %>%  #Metadados
  select(condition:age) %>% 
  distinct() %>% 
  mutate(type = if_else(type=="HO", "IN", type))

write.csv(gse206023_metadata_ready, file = "gse206023_metadata_ready.csv")

#Matriz de expressão
gse206023_counts_ready = gse206023_counts_long_annotated_14_11_23 %>% 
  mutate(counts = as.numeric(counts)) %>% 
  select(genes, sample, counts) %>% 
  pivot_wider(names_from = sample, values_from = counts, values_fn = mean) %>% 
  column_to_rownames("genes")

write.csv(gse206023_counts_ready, file = "gse206023_counts_ready.rds")
############## Inputs

ssgsea_gene = gse206023_counts_ready #Criar variável
metadata = gse206023_metadata_ready
filename = "GSE206023"
go_dataset = "CellMarker"

#Extrair sample names. O ssGSEA perde o nome no resultado.
sample_names = ssgsea_gene %>% 
  as.data.frame() %>% 
  colnames() %>% 
  as.data.frame() %>% 
  rename(sample_names = '.')

#Tabela com genes de interesse (geral) convertida em listas 

genelist = CellMarker_ImmuneCells %>% #CellMarker_ImmuneCells.csv
  select(cell = cell_name, genes = marker)
genelist = split(genelist$genes, genelist$cell) 

############## Análise 

# Run ssGSEA
nesmat = ssgsea(ssgsea_gene, genelist)

#Padronizar resultado
nesmat.result = nesmat %>% 
  as.data.frame() %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  relocate(sample, .before = everything()) %>% 
  cbind(sample_names) %>% 
  select(-sample) %>% 
  column_to_rownames("sample_names") %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("process") %>% 
  relocate(process, .before=everything()) %>% 
  pivot_longer(cols = -process, names_to = "sample", values_to = "nes") %>% 
  mutate(pvalue = z2p(nes), #Converter NES em pvalue
         qvalue = p.adjust(pvalue, method = "BH"), #ajustar para qvalue 
         logq = - log10(qvalue)) %>%  #Calcular -log(q)
  merge(metadata, by="sample", all.x=T) %>% #Unir com metadata
  mutate(study = filename) %>% 
  merge(CellMarker_ImmuneCells, by.x ="process", by.y = "cell_name", all.x=T) %>% 
  select(-"...1") %>% 
  rename(cell_name = process)

#Salvar
write.csv(nesmat.result, file = paste0(filename, sep = "_", go_dataset, "_ssgsea_result.csv"))

print(nesmat.result)

Unir resultados


############### Unir resultados
GSE199750_CellMarker_ssgsea_result = GSE199750_CellMarker_ssgsea_result %>%
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct()

GSE201533_CellMarker_ssgsea_result = GSE201533_CellMarker_ssgsea_result %>% 
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct()

GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>% 
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct() 
  
  
GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>% 
  mutate(age = as.double(age))

ssgsea_results_unified = bind_rows(GSE199750_CellMarker_ssgsea_result,
          GSE201533_CellMarker_ssgsea_result,
          GSE206023_CellMarker_ssgsea_result)

ssgsea_results_unified = ssgsea_results_unified %>% 
  select(cell_name:immune_system) %>% 
  merge(ann_vaccines, by.x = "condition", by.y = "Condition")

#Salvar
write.csv(ssgsea_results_unified, file = paste0(go_dataset, "_ssgsea_results_unified.csv"))

Visualização


# INPUT
nesmat.result = GSE199750_ssgsea_result
filename = "GSE199750"

#Filtrar

#Lista de processos

ssgsea_interesting = nesmat.result %>% 
  filter(pvalue <= 0.10) %>% 
  distinct()

#Boxplot (NES)
NES_plot = ggplot(ssgsea_interesting) +
  aes(x = condition, y = nes, fill = vaccine) +
  geom_boxplot() +
  scale_fill_hue(direction = 1) +
  theme_minimal() +
  facet_grid(vars(process), vars(vaccine), scales = "free_x") +
 labs(title = paste0(filename, "_ImmuneGO ssGSEA"), 
      fill = "vaccine") +
 theme_bw() +
 theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
       axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7)) 

#Salvar
ggsave(NES_plot, file= paste0(filename, "_ssGSEA_NES_qvalue010.png"), width = 10, height = 20)
ssgsea_interesting.unified = ssgsea_results_unified %>% 
  filter(!condition %in% c("BNT-MO (V1, D0)", "BNT-MO (V2, D0)")) %>% 
  mutate(day = as.factor(day),
         dose = as.factor(dose))

ssgsea_interesting.general.unified = ssgsea_interesting.unified %>% 
  filter(process %in% c("ADAPTIVE IMMUNE SYSTEM", 
                        "CELLULAR ADAPTIVE IMMUNE SYSTEM", 
                        "INNATE IMMUNE SYSTEM", 
                        "INFLAMMATION"),
         pvalue <= 0.10)

ssgsea_interesting.specific.unified = ssgsea_interesting.unified %>% 
  filter(!process %in% c("ADAPTIVE IMMUNE SYSTEM", 
                         "CELLULAR ADAPTIVE IMMUNE SYSTEM", 
                         "INNATE IMMUNE SYSTEM", 
                         "INFLAMMATION"),
         pvalue <= 0.10)


data = ssgsea_results_unified
filename.unified = "ssgsea_results_unified"
#All conditions divided by dose, time on x-axis

# Filtrar os dados antes de passar para ggplot
filtered_data <- data %>%
  group_by(cell_name, Vaccine, condition) %>%
  summarise(n = n()) %>%
  filter(n < 5) %>%
  pull(condition)


NES_plot_unified_dayx <- ggplot(data) +
  aes(x = day, y = nes, fill = Vaccine) +
  geom_boxplot(outlier.alpha = 1) +
  geom_point(data = subset(data, day %in% filtered_data),
             aes(x = day, y = nes, color = Vaccine),
             position = position_jitter(width = 0.2), alpha = 1) +
  scale_fill_manual(values = vaxcolors) +
  scale_color_manual(values = c(BBIBP = "#ffd000", 
                                ZF2001 = "#b5179e",
                                BNT = "#56cfe1",
                                ChAd = "#80ffdb" ,
                                "ChAd-BNT" = "#72efdd")) +
  labs(
    title = "ssGSEA - All studies",
    subtitle = "General ImmuneGO, by dose",
    caption = "Vaccine",
    fill = "Vaccines"
  ) + 
  theme_minimal() +
  facet_grid(vars(Type), vars(dose), scales = "free_x") +
  theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
        strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 5),
        strip.text.x = element_text(size = 10))

#Salvar
ggsave(NES_plot_unified_dayx, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_2.png"), width = 10, height = 10)

print(NES_plot_unified_dayx)
#All conditions divided by vaccine, time on x-axis

# Criar o gráfico
NES_plot_unified_dayx_vax <- ggplot(data) +
  aes(x = condition, y = nes, fill = dose) +
  geom_boxplot(outlier.alpha = 0.5) +
  geom_point(data = subset(data, condition %in% filtered_data),
             aes(x = condition, y = nes, color = "black"),
             position = position_jitter(width = 0.2), alpha = 1) +
  scale_fill_manual(values = dose_colors) +
  scale_color_manual(values = dose_colors) +
  labs(
    title = "ssGSEA - All studies",
    subtitle = "General ImmuneGO, by dose",
    caption = "Vaccine",
    fill = "Vaccines"
  ) +
  theme_minimal() +
  facet_grid(vars(process), vars(vaccine), scales = "free") +
  theme(
    plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 7),
    strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 7)  # Rotação do texto da facet row
  )

#Salvar
ggsave(NES_plot_unified_dayx_vax, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_3.png"), width = 20, height = 20)

print(NES_plot_unified_dayx_vax)

Analise estatística

install.packages("ggstatsplot")
library(ggstatsplot)

ssgsea_interesting = ssgsea_interesting %>% 
  filter(process == "INNATE IMMUNE SYSTEM") %>% 
  distinct()

process = "INNATE IMMUNE SYSTEM"


ggbetweenstats(
  data  = ssgsea_interesting,
  x     = condition,
  y     = nes,
  title =  paste0("ImmuneGO ssGSEA ", filename, process)
)

#Agrupado
grouped_ggbetweenstats(
  data             = ssgsea_interesting,
  x                = condition,
  y                = nes,
  grouping.var     = process,
  ggsignif.args    = list(textsize = 4, tip_length = 0.01),
  p.adjust.method  = "bonferroni",
  palette          = "default_jama",
  package          = "ggsci",
  plotgrid.args    = list(nrow = 1),
  annotation.args  = list(title = paste0("ImmuneGO ssGSEA ", filename))
)

ANOVA

esquisser(test)

library(ggsignif)
library(rstatix)

df = test
df$Condition <- as.factor(df$Condition)

stat.test <- df %>%
  t_test(Condition ~ NES) %>%
  add_significance()


test %>%
  filter(Process %in% "INNATE IMMUNE RESPONSE") %>%
  ggboxplot(x = "Process", y = "NES",
          color = "Condition", palette = "jco") +
  facet_wrap(vars(Vaccine)) +
  stat_compare_means(method = "anova", label.y = 4) +      # Add global p-value
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = ".all.")    # Comparação múltipla usando Tukey HSD


# Box plots with p-values
bxp <- ggboxplot(df, x = "Process", y = "NES", fill = "#00AFBB")
stat.test <- stat.test %>% add_xy_position(x = "supp")
bxp + 
  stat_pvalue_manual(stat.test, label = "p") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))

PCA

https://www.datacamp.com/tutorial/pca-analysis-r https://rpkgs.datanovia.com/factoextra/index.html http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/114-mca-multiple-correspondence-analysis-in-r-essentials/ https://statisticsglobe.com/pca-before-k-means-clustering-r>

Bibliotecas

library(factoextra)
library(caret)
library(stats)
library(ggfortify)
library(ggplot2)

By GO term


ssgsea_results_unified = ssgsea_results_unified %>% distinct() 
  
ssgsea_results_unified.innate = ssgsea_results_unified %>% 
  filter(process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES"))

ssgsea_results_unified.adaptive = ssgsea_results_unified %>% 
  filter(!process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES")) %>% 
  filter(!study == "GSE201533")
  
data_ImmuneGO = ssgsea_results_unified.adaptive 
filename = "ssgsea_results_unified.adaptive"

pca_ann = data_ImmuneGO %>% 
  select(sample, condition, vaccine, geo:sex)

#Converter de long para wide
matrix_genes = data_ImmuneGO %>% 
  select(sample, process, nes)

matrix_genes <- dcast(matrix_genes, `sample` ~ `process`, 
                     value.var = "nes", 
                     fun.aggregate = mean)

matrix_genes <- replace(matrix_genes, matrix_genes == "NaN", 0) 

#Converter coluna 1 em rownames e excluir
matrix_data_pca = matrix_genes %>% 
  as.data.frame()

matrix_data_pca_ready = matrix_data_pca %>%  
  column_to_rownames("sample")

ann_vaccines_pca_matrix = matrix_data_pca %>% 
  merge(pca_ann, by.y = "sample", by.x = "sample", all.x = T, all.y = F) %>% 
  distinct()

ann_vaccines_pca_matrix_ready = ann_vaccines_pca_matrix %>% 
  select(sample, condition, vaccine, day, dose) %>% 
  mutate(dose = as.factor(dose),
         day = as.factor(day))

# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)

# Crie o gráfico de PCA

###### Condition
pca_plot_condition = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'condition', 
                    label = 0) + 
  labs(title=paste0(filename, "_bycondition")) + #scale fill manual
  scale_fill_continuous(type = "viridis") +
  theme_minimal()


###### Vaccine
pca_plot_vac = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'vaccine', 
                    label = 0) + 
  labs(title=paste0(filename, "_byvaccine")) + #scale fill manual
  scale_color_manual(
    values = c(
  "BBIBP" = "#ffd000",
  "BNT" = "#56cfe1",
  "ChAd" = "#80ffdb",
  "ChAd-BNT" = "#0077b6",
  "ZF2001" = "#b5179e")) +
  theme_minimal()

###### Day
pca_plot_day = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'day', 
                    label = 0) + 
  labs(title=paste0(filename, "_day")) + #scale fill manual
   scale_color_manual(
     values = c(
       "0" = "grey90",
    "1" = "#caf0f8",
    "3" = "#90e0ef",
    "6" = "#00b4d8",
    "7" = "#0077b6",
    "14" = "#023e8a",
    "28" = "#03045e")) +
  theme_minimal()

###### Dose

pca_plot_dose = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'dose', 
                    label = 0) + 
  labs(title=paste0(filename, "_dose")) +
  scale_color_manual(
    values = c("0" = "#caf0f8", 
               "1" = "#56cfe1", 
               "2" = "#5978d4",
               "3" = "#b5179e"))+
  theme_minimal()

# Exiba o gráfico
print(pca_plot_condition)
print(pca_plot_day)
print(pca_plot_dose)
print(pca_plot_vac)

ggsave(pca_plot_condition, filename = paste0(filename, "CONDITION_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_vac, filename = paste0(filename, "_VACCINE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_day, filename = paste0(filename, "_DAY_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_dose, filename = paste0(filename, "_DOSE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
############### KNN classification

#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
fviz_nbclust(pca_scores,  
                     FUNcluster = kmeans,
                     method = "wss")

set.seed(123)                             # Set seed for randomization
kmeans_clust <- kmeans(pca_scores,        # Perform k-means clustering
                        centers = 3) # Definir numero de clusters

#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
                   habillage = kmeans_clust$cluster,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

print(ggp2)

ggsave(ggp2, file = paste0(filename, "_KNN_Clustered.png"), width = 5, height = 4)


# Clusterizar por grupo

#Vaccine
pca_group_vac <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$vaccine,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Vac_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

#Dose
pca_group_dose <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$dose,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Dose_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

#Day
pca_group_day <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$day,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Day_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")


#Salvar
ggsave(pca_group_day, file = paste0(filename, "_Day_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_dose, file = paste0(filename, "_Dose_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_vac, file = paste0(filename, "_Vac_Clustered.png"), width = 5, height = 4)
#Biplot
biplot = fviz_pca_biplot(pca_res,              # Visualize clusters in biplot
                      col.var = "black",
                      alpha.var = 0.5,
                      habillage = kmeans_clust$cluster,
                      repel = TRUE,
                      addEllipses = TRUE,
                      ellipse.type = "convex",
                      labelsize = 3,
                      label = "var",
                      palette = "Set1")


ggsave(biplot, file = paste0(filename, "_BIPLOT_KNN_Clustered.png"), width = 10, height = 8)
########Correlation plot
corr_matrix = cor(matrix_data_pca_ready) 

#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) + 
  theme(axis.text.x = element_text(angle = 90, size = 5), 
        axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"), 
       width = 4, #Grande 20, pequeno 10
       height= 4) #Grande 20, pequeno 10
print(corrplot)
data.pca <- princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs


#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs

#########Scree plot
scree_plot = fviz_eig(data.pca, 
                      addlabels = TRUE,
                      ylim = c(0, 70)) +
  geom_col(color = "#00AFBB", fill = "#00AFBB") +
  theme_classic()

#Salvar
ggsave(scree_plot, file = paste0(filename, "_GSEA_screeplot.png"), width = 10, height = 3) 
print(scree_plot)


#Scree plot
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))

#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) + 
  ggtitle(paste0("Comp1-Comp2 Genes", filename)) + 
  ylab("Comp1") +
  xlab("Gene sets") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores

print(loadings_1_plot)


loadings_2_plot = ggplot(loadings_2, aes(x = reorder(Genes, -Comp.2), y=Comp.2, fill = Comp.2)) + 
  ggtitle(paste0("Comp2-Comp3 Genes_", filename)) + 
  ylab("Comp2") +
  xlab("Gene sets") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores

print(loadings_2_plot)


#Salvar
ggsave(loadings_1_plot, 
       file = paste0(filename, "_GSEA_genescomp1-2.png"), 
       width = 10, height = 5)

#Salvar
ggsave(loadings_2_plot, 
       file = paste0(filename, "_GSEA_genescomp2-3.png"), 
       width = 10, height = 5)


# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca, 
                                  col.ind = "cos2",
                                  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
fviz_pca_var_genes

ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_GSEA.png"), width = 10)

cos2.1 = fviz_cos2(data.pca, choice = "var", axes = 1:2)
cos2.2 = fviz_cos2(data.pca, choice = "var", axes = 2:3)

ggsave(cos2.1, file = paste0(filename, "_cos2_GSEA_1.png"), width = 10)
ggsave(cos2.2, file = paste0(filename, "_cos2_GSEA_2.png"), width = 10)

By genes

Processar dados

# Immune_GO_general_innate
# Immune_GO_adaptive

#INPUT
data_genes = ssgsea_results_unified
filename = "ssgsea_results_unified"

PCA

#Converter de long para wide
matrix_genes = data_genes %>% 
  select(study, genes, l2fc) %>%  
  dcast(`study` ~ `genes`, 
        value.var = "l2fc", 
        fun.aggregate = mean) %>% 
  as.data.frame() %>% 
  column_to_rownames(var = "study") %>% 
  replace(is.na(.), 0)

matrix_data_pca = matrix_genes %>% 
  rownames_to_column(var = "Condition")

matrix_data_pca_ready = matrix_data_pca %>% 
  column_to_rownames(var = "Condition")

ann_vaccines_pca_matrix = ann_vaccines_pca %>% 
  merge(matrix_data_pca, 
        by.x = "Condition", 
        all.x = F, 
        all.y = F) %>% 
  select(Condition:Efficacy)

# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)

# Crie o gráfico de PCA
pca_plot_ann = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix, 
                    colour = 'Vaccine', 
                    label = 1) + #1: display, 0: hide
  labs(title=filename) +
  theme_classic()
  # scale_color_manual(values = c(BBIBP = "#black", 
  #                               ZF2001 = "black",
  #                               BNT = "#56cfe1",
  #                               ChAd = "#80ffdb" ,
  #                               "ChAd-BNT" = "#72efdd")) 

pca_plot_not_ann = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix, 
                    colour = 'Vaccine', 
                    label = 0) + #1: display, 0: hide
  labs(title=filename) +
  theme_classic() +
  stat_ellipse(type = "t")

#Salvar
ggsave(pca_plot_not_ann, filename = paste0(filename, "_PCA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_ann, filename = paste0(filename, "_PCA_ann.png"), width = 10, height = 8)

# Exiba o gráfico
print(pca_plot_ann)
print(pca_plot_not_ann)

############### KNN classification

#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
ggp1 <- fviz_nbclust(pca_scores,  
                     FUNcluster = kmeans,
                     method = "wss")

ggp1 

set.seed(123)                             # Set seed for randomization
kmeans_clust <- kmeans(pca_scores,        # Perform k-means clustering
                        centers = 4) # Definir numero de clusters

#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
                   habillage = kmeans_clust$cluster,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "convex",
                   labelsize = 2) +
     guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "KNN_Clustered.png"))

print(ggp2)
ggsave(ggp2, file = paste0(filename, "KNN_Clustered.png"), width = 5, height = 4)
########Correlation plot
#Matriz
corr_matrix = cor(matrix_data_pca_ready) 

#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) + 
  theme(axis.text.x = element_text(angle = 90, size = 5), 
        axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"), 
       width = 20, #Grande 20, pequeno 10
       height= 20) #Grande 20, pequeno 10
print(corrplot)

#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs

#########Scree plot
scree_plot = fviz_eig(data.pca, 
                      addlabels = TRUE,
                      ylim = c(0, 70)) +
  geom_col(color = "#00AFBB", fill = "#00AFBB") +
  theme_classic()

#Salvar
ggsave(scree_plot, file = paste0(filename, "_screeplot.png"), width = 10, height = 3) 
print(scree_plot)

#Comp1-Comp2
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))

#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) + 
  ggtitle(paste0("Comp1-Comp2 Genes", filename)) + 
  ylab("Comp1") +
  xlab("Genes") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 0.1, hjust = 1, angle = 90, size = 3), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores
  
#Salvar
ggsave(loadings_1_plot, 
       file = paste0(filename, "_genescomp1-2.png"), 
       width = 10, height = 5)

print(loadings_1_plot)
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca, 
                                  col.ind = "cos2",
                                  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

#Salvar
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_genes.png"), width = 10)

######### COS2
cos2 = fviz_cos2(data.pca, 
                 choice = "var", 
                 axes = 1:2) + 
  theme(axis.text.x = element_text(angle=45, 
                                   size = 3)) +
  theme_light()

#Salvar
ggsave(cos2, file = paste0(filename, "_cos2.png"), width = 10, height = 5)

#Colorido
fviz_pca_var(data.pca, col.var = "cos2",
            gradient.cols = c("black", "orange", "green"),
            repel = TRUE)

#Print
print(fviz_pca_var_genes)
print(cos2)

CellMarker

#GSEA

Manual

######### INPUT AQUI ######### 
# all_degs_p_05_vac_infected = all_degs_p_05_vac_infected_27_11_23 %>%
#   mutate(condition = case_when(condition == "H-BNT-I (D1)" ~ "BNT-I (D1)",
#                                condition == "H-BNT-I (D3)" ~ "BNT-I (D3)",
#                                condition == "H-BNT-I (D4)" ~ "BNT-I (D4)",
#                                condition == "H-I-I (D2)" ~ "I-I (D2)",
#                                condition == "I-BNT-I (D2)" ~ "I-BNT-I (D2)",
#                                condition == "I-BNT-I (D5)" ~ "I-BNT-I (D5)",
#                                condition == "H-BNT-I (D10, mild)" ~ "BNT-I (D10, mild)",
#                                condition == "H-BNT-I (D10, severe)" ~ "BNT-I (D10, severe)",
#                                condition == "H-BNT-I (D26, mild)" ~ "BNT-I (D26, mild)",
#                                condition == "H-BNT-I (D26, severe)" ~ "BNT-I (D26, severe)",
#                                condition == "H-I (D10, moderate)" ~ "I (D10, moderate)",
#                                condition == "H-I (D10, severe)" ~ "I (D10, severe)",
#                                condition == "H-I (D26, moderate)" ~ "I (D26, moderate)",
#                                condition == "H-I (D26, severe)" ~ "I (D26, severe)",
#                                condition == "H-I (D51, moderate)" ~ "I (D51, moderate)",
#                                condition == "H-I (D51, severe)" ~ "I (D51, severe)",
#                                TRUE ~ condition))
# 
# write.csv(all_degs_p_05_vac_infected_27_11_23, file ="all_degs_p_05_vac_infected_29_11_23.csv")

all_degs_p_05_vac_infected

#GSE199750
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V1, D6)") #Não enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT-MO (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D6)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V3, D1)") #Não enriquecido

#GSE201533
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D3)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D7)") #Enriquecido

#GSE206023
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D14)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D28)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D14)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D28)") #Enriquecido

#GSE201530
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D4)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D5)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D5)")


#GSE189039
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, severe)")
#Definir nome para arquivos gerados
file_name = degs %>% 
  select(condition) %>% 
  distinct() %>% 
  as.character()

######### DEGs para ORA ######### 
gene_list_ora <- degs$genes
######### DEGs para GSEA ######### 
degs_gene_list_lf2c <- degs$log2fold_change
names(degs_gene_list_lf2c) <- degs$genes
degs_gene_list_lf2c<-na.omit(original_gene_list)
degs_gene_list_lf2c = sort(degs_gene_list_lf2c, decreasing = TRUE)

############ IMMUNE GO
Immune_GO_T2G = ImmuneGO_Annotated_Genes_Nested_Interesting %>% 
  select(gene_set_short, genes)

immune_GO_gsea <- GSEA(degs_gene_list_lf2c, 
                       TERM2GENE = Immune_GO_T2G, 
                       minGSSize = 2,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25,
                       pAdjustMethod = "BH") %>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "ImmuneGO")
preparing geneSet collections...
GSEA analysis...
leading edge analysis...
done...
#Salvar tabela
write.csv(immune_GO_gsea, file = paste(file_name, "ImmuneGO_GSEA.csv", sep = "_"))

########### CELL MARKER

CellMarker_genes = CellMarker_ImmuneCells %>% 
  select(cell_name, marker)

cell_types_gsea_immune <- GSEA(degs_gene_list_lf2c, 
                       TERM2GENE = CellMarker_genes, 
                       minGSSize = 1,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25, 
                       pAdjustMethod = "BH") %>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "CellMarker")
preparing geneSet collections...
GSEA analysis...
leading edge analysis...
done...
#Salvar tabela
write.csv(cell_types_gsea_immune, file=paste(file_name, "CellMarker_GSEA.csv", sep = "_"))

########### VAX SIG DB

vax_sig_genes = VAX_Genes_Annotated_RAW %>% 
  select(STANDARD_NAME, GENE)

ora_degs_vax <- enricher(gene_list_ora, 
                       TERM2GENE = vax_sig_genes, 
                       minGSSize = 1,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25, 
                       pAdjustMethod = "BH")%>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "VAXSig")

#Salvar 
write.csv(ora_degs_vax, file=paste(file_name, "VAXSig_ORA.csv", sep = "_"))

Automatizado

---
title: "Naive and infected patients, vaccinated"
output: html_notebook
---

```{r}
library(readr)
library(qvalue)
library(explore)
library(maditr)
library(GEOquery)
library(Matrix)
library(ggplot2)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(celldex)
library(biomaRt)
library(org.Hs.eg.db)
library(plotly)
library(DESeq2)
library(msigdbr)
library(ape)
library(GSVA)
library(sva)
library(readxl)
library(clusterProfiler)
library(pheatmap)
library(gplots)
library(EnhancedVolcano)
library(metaRNASeq)
library(ggbeeswarm)
library(editData)
library(tidyverse)
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
library(esquisse)
library(corto)
library(circlize)
library(ComplexHeatmap)
library(janitor)
```


# 1. Preparação dos dados

## 2. Definir padrões de figuras

####Cores
```{r}
# Definir variáveis de cores
vaxcolors = c(
  "BBIBP" = "#ffd000",
  "BNT" = "#56cfe1",
  "ChAd" = "#80ffdb",
  "ChAd-BNT" = "#72efdd",
  "ZF2001" = "#b5179e")

dose_colors = c(
    "1" = "#56cfe1",
    "2" = "#5978d4",
    "3" = "#b5179e")

day_colors = c(
    "1" = "#caf0f8",
    "3" = "#90e0ef",
    "6" = "#00b4d8",
    "7" = "#0077b6",
    "14" = "#023e8a",
    "28" = "#03045e")

type_colors = c('IN'= "#ffd000", #First generation vaccines
           'LA' = "#ffff3f",
           'CONJ' = "#f4a261",
           'VLP' = "#da1e37", #Second generation vaccines
           'SU' = "#ff4d6d",
           'PEP' = "#f48498",
           'VV' = "#72efdd", #Third generation vaccines
           'RNA' = "#56cfe1",
           'VV-RNA' = "#5e60ce",
           'IN-SU' = "#b5179e")

regimen_colors = c('HO' = "#56cfe1",
              'HE' = "#5e60ce")

immune_system_colors = c(
    "Innate" = "#faedcd",
    "Adaptive" = "#ffb5a7",
    "General" = "#99d98c",
    "Other" = "#e5e5e5",
    "Complement" = "#7400b8")

immune_subsystem_colors = c(
    "Adaptive" = "#ffb5a7",
    "Cellular" = "#ff758f",
    "Humoral" = "#ff758f",
    "Antibacterial" = "#4895ef",
    "Antifungal" = "#4361ee",
    "Antimicrobial" = "#7400b8",
    "General" = "#99d98c",
    "Antigen presentation" = "#e4c4a5",
    "APC" = "#ccd5ae",
    "Eosinophil" = "#faedcd",
    "Granulocyte" = "#faedcd",
    "Leukocyte" = "#ffb5a7",
    "Neutrophil" = "#ccd5ae",
    "Innate immune response" = "#faedcd",
    "Interferon" = "#a4c3b2",
    "Antiviral" = "#60495a",
    "Pattern recognition receptor" = "#f4f4d5",
    "Natural killer" = "#fedd96",
    "Mucosa" = "#c3dfe0",
    "Erythrocyte" = "#4895ef")



########## Colors dictionary

#Vaccines
ann_colors = list(
  vaccine = c(
  "BBIBP" = "#dee2e6",
  "BNT" = "#56cfe1",
  "ChAd" = "#80ffdb",
  "ChAd-BNT" = "#5e60ce",
  "ZF2001" = "#b5179e",
  "INFECTION" = "#ff4d6d",
  "INFECTION-VACCINE"= "#ffccd5"),
  dose = c(
    "NA" = "white",
    "0" = "white",
    "1" = "#56cfe1",
    "2" = "#5978d4",
    "3" = "#b5179e"),
  day = c(
    "0" = "white",
    "1" = "#caf0f8",
    "2" = "#ade8f4",
    "3" = "#90e0ef",
    "4" = "#6CD5EA",
    "5" = "#48cae4",
    "6" = "#00b4d8",
    "7" = "#0096c7",
    "10" = "#0087BF",
    "14" = "#0077b6",
    "26" = "#015BA0",
    "28" = "#023e8a",
    "51" = "#03045e"),
  type = c('IN'= "#e5e5e5", #1st Gen  vaccines
           'SU' = "#00a896",
           'VV' = "#72efdd", #3rd Gen vaccines
           'RNA' = "#56cfe1",
           'VV-RNA' = "#5e60ce", #Heterologous
           'IN-SU' = "#b5179e",
           'INFECTION'= "#da1e37"), #Infection
  regimen = c('HO' = "grey90",
              'HE' = "#8d99ae",
              "V-I-V" = "#36248f",
              "V-I" = "#D7B0EE",
              "I-V-I" = "#7400b8",
              "I-I" = "#5390d9",
              "I" = "#64dfdf"),
  infection = c("none" = "white",
                "1" = "#56cfe1",
                "2" = "#5e60ce"),
  variant = c("NA" = "white",
              "Beta" = "#72efdd",
              "Omicron" = "#5e60ce"),
  severity = c("A (9%) MI (81%) MO (0) S (4%)" = "#caf0f8",
               "A (29%) MI (57%) MO (14) S (0%)" = "#56cfe1",
               "A (0%) MI (89%) MO (11%) S (0%)" = "#72efdd",
               "A (9%) MI (47%) MO (21%) S (3%)" = "#64dfdf",
               "S" = "#5e60ce",
               "MO" = "#64dfdf",
               "MI" = "#72efdd",
               "NA" = "white"))

#Rows
# ann_genesets_heatmap
ann_colors_rows = list(
  `immune_system` = c(
    "Innate" = "#faedcd",
    "Adaptive" = "#ffb5a7",
    "General" = "#99d98c",
    "Other" = "#e5e5e5",
    "Complement" = "#7400b8",
    "No enrichment" = "gray90"
  ),
  `immune_sub_system` = c(
    "Adaptive" = "#ffb5a7",
    "Cellular" = "#ff758f",
    "Humoral" = "#ff758f",
    "Antibacterial" = "#4895ef",
    "Antifungal" = "#4361ee",
    "Antimicrobial" = "#7400b8",
    "General" = "#99d98c",
    "Antigen presentation" = "#e4c4a5",
    "APC" = "#ccd5ae",
    "Eosinophil" = "#faedcd",
    "Granulocyte" = "#faedcd",
    "Leukocyte" = "#ffb5a7",
    "Neutrophil" = "#ccd5ae",
    "Innate immune response" = "#faedcd",
    "Interferon" = "#a4c3b2",
    "Antiviral" = "#60495a",
    "Pattern recognition receptor" = "#f4f4d5",
    "Natural killer" = "#fedd96",
    "Mucosa" = "#c3dfe0",
    "Erythrocyte" = "#4895ef",
    "No enrichment" = "gray90"
  )
)
  

```



# 3. Seleção de estudos


```{r}
# GSE189039
# GSE201530
```

## 3.1 Baixar dados

```{r eval=FALSE, include=FALSE}
#Baixar dados de uma lista de estudos

############ Obter Tabela de counts
#GEO supplementary files. Baixar individualmente. Arquivos grandes devem ser baixados direto do navegador.
getGEOSuppFiles('GSE189039') # Baixar tabela de counts
untar('GSE189039_RAW.tar') # Descompactar tar zipped
#gunzip("GSE174621_RAW.tar") # Descompactar gunzip (.gz)


############ Metadados

filename = "GSE189039"
GSE189039 <- getGEO("GSE189039", GSEMatrix = TRUE) #Use GSEMatrix = TRUE para obter metadados de cada amostra
GSE189039 <- GSE189039[[1]] #Analisar o primeiro objeto da lista. 

#############Analisar informações sobre amostras. 

GSE189039_metadata = GSE189039 %>% 
  pData() %>% #Informações sobre amostras 
  select( #Selecionar e renomear colunas de interesse
    geo_sample = "geo_accession", 
    subject_id = "title", age = "age:ch1", 
    sex = "gender:ch1", disease_state = "diseas state:ch1", 
    severity = "severity:ch1") 

write.csv(GSE189039_metadata, file = "GSE189039_metadata.csv") #Terminar de anotar no excel

```


```{r eval=FALSE, include=FALSE}
############ Obter Tabela de counts
#GEO supplementary files. Baixar individualmente. Arquivos grandes devem ser baixados direto do navegador.
getGEOSuppFiles('GSE201530') # Baixar tabela de counts
untar('GSE201530_RAW.tar') # Descompactar tar zipped

############ Metadados

filename = "GSE201530"
GSE201530 <- getGEO("GSE201530", GSEMatrix = TRUE) #Use GSEMatrix = TRUE para obter metadados de cada amostra
GSE201530 <- GSE201530[[1]] #Analisar o primeiro objeto da lista. 

#############Analisar informações sobre amostras. 

GSE201530_metadata = GSE201530 %>% 
  pData() %>% #Informações sobre amostras 
  select(
    geo_sample = "geo_accession", 
    subject_id = "title", 
    age = "age:ch1", 
    sex = "gender:ch1", 
    disease_state = "disease state:ch1", 
    timepoint = "days after positive pcr results:ch1",
    group = "group (by covid-19 vaccination, prior infection):ch1",
    variant = "omicron sublineage:ch1") 

write.csv(GSE201530_metadata, file = "GSE201530_metadata.csv")

#Anotar no excel com dados do artigo
```

*Ler arquivos e transformar em tabela*

Abra a lista do estudo e analise suas informações. Ao descompactar, surgem diversos arquivos ".featureCounts.txt.gz", que devem ser descompactados. Para descompactar a lista de arquivos, use um loop.

```{r eval=FALSE, include=FALSE}
#Descompactar todos os arquivos
diretorio <- "~/Desktop/RNA seq - Wasim/GSE189039"
arquivos <- list.files(path = diretorio, pattern = "*.gz$", full.names = TRUE)
for(arquivo_gz in arquivos) {gunzip(arquivo_gz)}

#Ler arquivos do estudo específico
diretorio_GSE201530 <- "~/Desktop/RNA seq - Wasim/GSE201530"
diretorio_GSE189039 <- "~/Desktop/RNA seq - Wasim/GSE189039"

arquivos_txt_GSE201530 <- list.files(path = diretorio_GSE201530, pattern = "*.txt$", full.names = TRUE)
arquivos_txt_GSE189039 <- list.files(path = diretorio_GSE189039, pattern = "*.txt$", full.names = TRUE)

# Função para ler e ajustar os arquivos
ler_e_ajustar <- function(arquivo) {
  dados <- read.table(arquivo, header = FALSE, sep = "\t")
  if (ncol(dados) >= 2) {
    colnames(dados)[1:2] <- c("genes", tools::file_path_sans_ext(basename(arquivo)))
    return(dados)
  } else {
    return(NULL)
  }
}

# Ler e ajustar todos os arquivos na lista
lista_dados_GSE201530 <- lapply(arquivos_txt_GSE201530, ler_e_ajustar)
lista_dados_GSE189039 <- lapply(arquivos_txt_GSE189039, ler_e_ajustar)

# Filtrar elementos NULL da lista e unir os dataframes
counts_gse201530 <- lista_dados_GSE201530 %>%
  purrr::discard(is.null) %>%
  reduce(full_join, by = "genes")

counts_GSE189039 <- lista_dados_GSE189039 %>%
  purrr::discard(is.null) %>%
  reduce(full_join, by = "genes")

# Renomear as colunas exceto a primeira ("genes")
colnames(counts_gse201530)[-1] <- gsub("^[^_]+_(.*)", "\\1", colnames(counts_gse201530)[-1])
colnames(counts_GSE189039)[-1] <- gsub("^[^_]+_(.*)", "\\1", colnames(counts_GSE189039)[-1])


# Ler e ajustar todos os arquivos na lista
lista_dados_GSE201530 <- lapply(arquivos_txt_GSE201530, ler_e_ajustar)
lista_dados_GSE189039 <- lapply(arquivos_txt_GSE189039, ler_e_ajustar)

# Filtrar elementos NULL da lista e unir os dataframes
counts_gse201530 <- lista_dados_GSE201530 %>%
  purrr::discard(is.null) %>%
  reduce(full_join, by = "genes")

counts_GSE189039 <- lista_dados_GSE189039 %>%
  purrr::discard(is.null) %>%
  reduce(full_join, by = "genes")

# Renomear as colunas exceto a primeira ("genes")
colnames(counts_gse201530)[-1] <- gsub("^[^_]+_(.*)", "\\1", colnames(counts_gse201530)[-1])
colnames(counts_GSE189039)[-1] <- gsub("^[^_]+_(.*)", "\\1", colnames(counts_GSE189039)[-1])

#Salvar
write.csv(counts_gse201530, file = "gse201530_counts.csv")
write.csv(counts_GSE189039, file = "gse189039_counts.csv")

```



# 4. Análise de dados

## 4.1. Análise de expressão genica diferencial

*Padronizando tabela de counts*

```{r eval=FALSE, include=FALSE}

############### GSE201530
# Ordenar tabela
gse201530_counts_ready <- counts_gse201530 %>%
  column_to_rownames("genes") %>% 
  t() %>%
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  arrange(sample) %>% 
  column_to_rownames("sample") %>% 
  t() %>% 
  as.data.frame() %>%
  rownames_to_column("genes") %>% 
  filter(!grepl("_", genes)) %>% 
  column_to_rownames("genes")

# Salvar arquivo com rownames
saveRDS(gse201530_counts_ready, file = "gse201530_counts_ready.rds")


############### GSE189039
# Ordenar tabela
gse189039_counts_ready <- counts_GSE189039 %>%
  column_to_rownames("genes") %>% 
  t() %>%
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  arrange(sample) %>% 
  column_to_rownames("sample") %>% 
  t() %>% 
  as.data.frame() %>%
  rownames_to_column("genes") %>% 
  filter(!grepl("_", genes)) %>% 
  column_to_rownames("genes")

# Salvar arquivo com rownames
saveRDS(gse189039_counts_ready, file = "gse189039_counts_ready.rds")

GSE189039_metadata

```

*Metadata*

A tabela de counts fornecida pelo estudo é nomeada com o Subject ID + Timepoint e não com o codigo GSM. Por isso, teremos de tratar a tabela de metadados. Além disso, ela não possui a coluna de idade. É possível manipular estes dados no excel.


*DESEQ2*

*Preparando dados*


## GSE201530

```{r}
###DESEQ2 - PF versus AZ, PF em diferentes tempos, AZ em diferentes tempos

####Definindo design

###### Padronize as variáveis 
countData <- gse201530_counts_ready
colData <- GSE201530_metadata %>%
  mutate(
    timepoint_day = factor(timepoint_day),
    age = factor(age),
    sex = factor(sex),
    disease_vac = factor(disease_vac),
    dose = factor(dose),
    disease_time = paste0(disease_vac, "_D", timepoint_day)) %>% 
  select(subject_id, everything())


###### Crie o objeto DESeqDataSet ######
# O design deve incluir todos os fatores que podem influenciar significativamente os dados. Coloque por último o fator de interesse principal - no caso, o fator de primeira ou segunda doses, pois nesta primeira etapa não serão analisados os efeitos de uma terceira dose de vacina heteróloga.

dds <- DESeqDataSetFromMatrix(countData = countData, 
                              colData = colData, 
                              design = ~ disease_time)

###### Pré-filtragem ######
#Mantenha os genes com pelo menos 10 leituras no total.

keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

```

*Executando DESEQ2*

```{r}
###### Executar DESeq ######

#Executar DESeq com método Wald
dds <- DESeq(dds)

#Salvar
write_rds(dds,file="gse201530_dds_DESeq2_Wald_disease_vac_time.rds")

###### Obter comparações realizadas ######
resultsNames(dds)

dds = gse201530_dds_DESeq2_Wald_disease_vac_timepoint_day

###### Obtendo os DEGs totais ######
res <- results(dds) %>% 
  as.data.frame() %>% 
  filter(pvalue < 0.05)

#Salvar tabela
write.csv(res, file = "gse201530_dds_DESeq2_Wald_disease_vac_AND_timepoint_day_ALLDEGS.rds", row.names = TRUE)

```

*Comparando tempos e condições*

Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos "contrast" ou "name".

```{r}
#H-BNT-I
# contrast = c("disease_time", "H.BNT.I_D1", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D2", "H_D0") (NAO DEU)
# contrast = c("disease_time", "H.BNT.I_D3", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D4", "H_D0")

#I-BNT-I
# contrast = c("disease_time", "I.BNT.I_D2", "H_D0")
# contrast = c("disease_time", "I.BNT.I_D5", "H_D0")

#H-I
# contrast = c("disease_time", "H.I_D1", "H_D0")

#H-I-I
# contrast = c("disease_time", "H.I.I_D2", "H_D0")
#I-I
# contrast = c("disease_time", "I.I_D1", "H_D0")
# contrast = c("disease_time", "I.I_D3", "H_D0")
# contrast = c("disease_time", "I.I_D5", "H_D0")
```

*H_BNT_I* 
```{r}
######### H_BNT_I

#H_BNT_I_D1
gse201530_H_BNT_I_D1 = results(dds, contrast = c("disease_time", "H.BNT.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D1)",
         day = 1,
         vaccine = "H-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_BNT_I_D1, file = "gse201530_H_BNT_I_D1_DEGs_notfiltered.csv", row.names = TRUE)

#H_BNT_I_D3
gse201530_H_BNT_I_D3 = results(dds, contrast = c("disease_time", "H.BNT.I_D3", "H_D0")) %>%
  as.data.frame() %>%
  arrange(padj) %>%
  mutate(condition = "H-BNT-I (D3)",
         day = 3,
         vaccine = "H-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL"))) %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_BNT_I_D3, file = "gse201530_H_BNT_I_D3_DEGs.csv", row.names = TRUE)

#H_BNT_I_D4
gse201530_H_BNT_I_D4 = results(dds, contrast = c("disease_time", "H.BNT.I_D4", "H_D0")) %>%
  as.data.frame() %>%
  filter(padj < 0.05) %>%
  arrange(padj) %>%
  mutate(condition = "H-BNT-I (D4)",
         day = 4,
         vaccine = "H-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_BNT_I_D4, file = "gse201530_H_BNT_I_D4_DEGs.csv", row.names = TRUE)

gse201530_H_BNT_I = bind_rows(gse201530_H_BNT_I_D1,
                            gse201530_H_BNT_I_D3,
                            gse201530_H_BNT_I_D4)

write.csv(gse201530_H_BNT_I, "gse201530_H_BNT_I.csv")

```


*I_BNT_I* 
```{r}
######### I_BNT_I

#D2
gse201530_I_BNT_I_D2 = results(dds, contrast = c("disease_time", "I.BNT.I_D2", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-BNT-I (D2)",
         day = 2,
         vaccine = "I-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_BNT_I_D2, file = "gse201530_I_BNT_I_D2_DEGs.csv")


#D5
gse201530_I_BNT_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
  as.data.frame() %>%
  filter(padj < 0.05) %>%
  arrange(padj) %>%
  mutate(condition = "I-BNT-I (D5)",
         day = 5,
         vaccine = "I-BNT-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL"))) %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_BNT_I_D5, file = "gse201530_H_BNT_I_D5_DEGs.csv")

gse201530_I_BNT_I = bind_rows(gse201530_I_BNT_I_D2,
                            gse201530_I_BNT_I_D5)

write.csv(gse201530_I_BNT_I, "gse201530_I_BNT_I.csv")

```


*H-I* 
```{r}
######### H-I

#D1
gse201530_H_I_D1 = results(dds, contrast = c("disease_time", "H.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D1)",
         day = 1,
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_D1, file = "gse201530_H_I_D1_DEGs-PF.csv")


#D5
gse201530_H_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
  as.data.frame() %>%
  filter(padj < 0.05) %>%
  arrange(padj) %>%
  mutate(condition = "H-I (D5)",
         day = 5,
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP",
                            ifelse(log2FoldChange <= -1, "DOWN",
                                   "NEUTRAL"))) %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_D5, file = "gse201530_H_I_D5_DEGs-PF.csv")

gse201530_H_I = bind_rows(gse201530_I_BNT_I_D2,
                            gse201530_I_BNT_I_D5)

write.csv(gse201530_H_I, "gse201530_H_I.csv")

```

*H-I-I* 
```{r}

######### H-I-I
#D2
gse201530_H_I_I_D2 = results(dds, contrast = c("disease_time", "H.I.I_D2", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I-I (D2)",
         day = 2,
         vaccine = "H-I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_H_I_I_D2, file = "gse201530_H_I_I_D2_DEGs-PF.csv")

```

*I-I*
```{r}

######### I-I
#D1
gse201530_I_I_D1 = results(dds, contrast = c("disease_time", "I.I_D1", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D1)",
         day = 1,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D1, file = "gse201530_I_I_D1_DEGs-PF.csv")


#D3
gse201530_I_I_D3 = results(dds, contrast = c("disease_time", "I.I_D3", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D3)",
         day = 3,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D3, file = "gse201530_I_I_D3_DEGs-PF.csv")


#D5
gse201530_I_I_D5 = results(dds, contrast = c("disease_time", "I.I_D5", "H_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "I-I (D5)",
         day = 5,
         vaccine = "I-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse201530_I_I_D5, file = "gse201530_I_I_D5_DEGs-PF.csv")

gse201530_I_I = bind_rows(gse201530_I_I_D1,
                          gse201530_I_I_D3,
                          gse201530_I_I_D5)

```

Unir raw datasets 
```{r}
########## GSE201530

gse201530_DEGs = bind_rows(gse201530_I_I,
                           gse201530_H_I_I_D2,
                           gse201530_H_I,
                           gse201530_I_BNT_I,
                           gse201530_H_BNT_I) %>% 
  mutate(study = "GSE201530")

#Salvar
write.csv(gse201530_DEGs, file = "gse201530_DEGs_27-11-23.csv")

GSE201530_metadata_conditions = gse201530_DEGs %>% 
  select(condition:vaccine, study) %>% 
  distinct()


#Anotações
ann_vaccines_lower = ann_vaccines %>% 
  clean_names()

ann_vaccines_27_11_23 = bind_rows(ann_vaccines_lower, 
                                  GSE201530_metadata_conditions)

#Salvar
write.csv(ann_vaccines_27_11_23, file = 'ann_vaccines_27_11_23.csv')


```






## GSE189039

```{r}
###DESEQ2 - PF versus AZ, PF em diferentes tempos, AZ em diferentes tempos

####Definindo design

###### Padronize as variáveis 
countData <- gse189039_counts_ready
colData <- GSE189039_metadata %>%
  mutate(disease_vac = ifelse(disease_vac == "I", "H-I",
                              ifelse(disease_vac == "BNT-I", "H-BNT-I", disease_vac)),
         disease_vac_severity_time = paste0(disease_vac, "_", severity, "_D", timepoint_day),
         disease_vac_severity_time = factor(disease_vac_severity_time)) %>% 
  select(subject_id, everything())


###### Crie o objeto DESeqDataSet ######
dds <- DESeqDataSetFromMatrix(countData = countData, 
                              colData = colData, 
                              design = ~ disease_vac_severity_time)

###### Pré-filtragem ######
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep,]

###### Executar DESeq ######

#Método Wald
dds <- DESeq(dds)

#Salvar
write_rds(dds,file="gse189039_dds_DESeq2_Wald_disease_vac_severity_time.rds")

###### Obter comparações realizadas ######
resultsNames(dds)

###### Obtendo os DEGs totais ######
res <- results(dds) %>% 
  as.data.frame() %>% 
  filter(pvalue < 0.05)

#Salvar tabela
write.csv(res, file = "gse189039_dds_DESeq2_Wald_disease_vac_severity_time_ALLDEGS.rds")

```

*Comparando tempos e condições*

Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos "contrast" ou "name".

```{r}

## H-I
# MODERATE
# contrast = c("disease_vac_severity_time", "H.I_moderate_D10", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.I_moderate_D26", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.I_moderate_D51", "H_naive_vac_D0")

# SEVERE
# contrast = c("disease_vac_severity_time", "H.I_severe_D10", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.I_severe_D26", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.I_severe_D51", "H_naive_vac_D0")


## H-BNT-I
# MILD
# contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D10", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D26", "H_naive_vac_D0")

# SEVERE
# contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D10", "H_naive_vac_D0")
# contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D26", "H_naive_vac_D0")

## BNT-I-BNT
# contrast = c("disease_vac_severity_time", "BNT.I.BNT_severe_D51", "H_naive_vac_D0")
contrast = c("disease_vac_severity_time", "BNT.I.BNT_mild_D51", "H_naive_vac_D0") (nao foi ???)

```

*H_I* 
```{r}
######### H_I

#H_I_D10
gse189039_H_I_moderate_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D10, moderate)",
         day = 10,
         vaccine = "H-I",
         severity = "moderate",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D10, file = "gse189039_H_I_moderate_D10_DEGs.csv")

#H_I_D26
gse189039_H_I_moderate_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D26, moderate)",
         day = 26,
         severity = "moderate",
         vaccine = "H-I",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D26, file = "gse189039_H_I_moderate_D26_DEGs.csv")

#H_I_D51
gse189039_H_I_moderate_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D51", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D51, moderate)",
         day = 51,
         vaccine = "H-I",
         severity = "moderate",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_moderate_D51, file = "gse189039_H_I_moderate_D51_DEGs.csv")

#H_I_severe_D10
gse189039_H_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D10, severe)",
         day = 10,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D10, file = "gse189039_H_I_severe_D10_DEGs.csv")



#H_I_severe_D26
gse189039_H_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D26, severe)",
         day = 26,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D26, file = "gse189039_H_I_severe_D26_DEGs.csv")

#H_I_severe_D51
gse189039_H_I_severe_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D51", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-I (D51, severe)",
         day = 51,
         vaccine = "H-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_I_severe_D51, file = "gse189039_H_I_severe_D51_DEGs.csv")

gse189039_H_I = bind_rows(gse189039_H_I_moderate_D10,
                          gse189039_H_I_severe_D10,
                            gse189039_H_I_moderate_D26,
                          gse189039_H_I_severe_D26,
                            gse189039_H_I_moderate_D51,
                          gse189039_H_I_severe_D51)

write.csv(gse189039_H_I, "gse189039_H_I_DEGS.csv")

```


*H_BNT_I* 
```{r}
######### H-BNT-I

#H_BNT-I

#MILD
gse189039_H_BNT_I_mild_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D10, mild)",
         day = 10,
         vaccine = "H-BNT-I",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D10, file = "gse189039_H_BNT_I_mild_D10_DEGs.csv")


gse189039_H_BNT_I_mild_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D26, mild)",
         day = 26,
         vaccine = "H-BNT-I",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D26, file = "gse189039_H_BNT_I_mild_D26_DEGs.csv")


#SEVERE

gse189039_H_BNT_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D10", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D10, severe)",
         day = 10,
         vaccine = "H-BNT-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D10, file = "gse189039_H_BNT_I_severe_D10_DEGs.csv")


gse189039_H_BNT_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D26", "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "H-BNT-I (D26, severe)",
         day = 26,
         vaccine = "H-BNT-I",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D26, file = "gse189039_H_BNT_I_severe_D26_DEGs.csv")

#UNIR
gse189039_H_BNT_I = bind_rows(gse189039_H_BNT_I_mild_D10,
                              gse189039_H_BNT_I_severe_D10,
                              gse189039_H_BNT_I_mild_D26,
                              gse189039_H_BNT_I_severe_D26)
#Salvar
write.csv(gse189039_H_BNT_I, file = "gse189039_H_BNT_I_DEGS.csv")

```


*BNT-I-BNT*
```{r}
######### BNT-I-BNT	

# D51
gse189039_BNT_I_BNT_severe_D51 = results(dds, contrast = c("disease_vac_severity_time",
                                                         "BNT.I.BNT_severe_D51", 
                                                         "H_naive_vac_D0")) %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  arrange(padj) %>% 
  mutate(condition = "BNT-I-BNT (D51, severe)",
         day = 51,
         vaccine = "BNT-I-BNT",
         severity = "severe",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes")

#Salvar tabela
write.csv(gse189039_BNT_I_BNT_severe_D51, file = "gse189039_BNT_I_BNT_severe_D51_DEGs.csv")


#Aqui tive de usar o argumento name, pois contrast não estava funcionando (?)
gse189039_BNT_I_BNT_mild_D51 = results(dds, name= "disease_vac_severity_time_H_naive_vac_D0_vs_BNT.I.BNT_mild_D51") %>% 
  as.data.frame() %>% 
  filter(padj < 0.05) %>%
  mutate(log2FoldChange = ifelse(log2FoldChange > 0, -log2FoldChange, abs(log2FoldChange))) %>% 
  arrange(padj) %>% 
  mutate(condition = "BNT-I-BNT (D51, mild)",
         day = 51,
         vaccine = "BNT-I-BNT",
         severity = "mild",
         direction = ifelse(log2FoldChange >= 1, "UP", 
                            ifelse(log2FoldChange <= -1, "DOWN", 
                                   "NEUTRAL")))  %>% 
  rownames_to_column("genes") 

#Salvar tabela
write.csv(gse189039_BNT_I_BNT_mild_D51, file = "gse189039_BNT_I_BNT_mild_D51_DEGs.csv")

#Unir
gse189039_BNT_I_BNT = bind_rows(gse189039_BNT_I_BNT_mild_D51,
                                gse189039_BNT_I_BNT_severe_D51)


write.csv(gse189039_BNT_I_BNT, file = "gse189039_BNT_I_BNT_DEGS.csv")

```

*UNIR*
```{r}
gse189039_DEGs = bind_rows(gse189039_BNT_I_BNT,
                           gse189039_H_I,
                           gse189039_H_BNT_I) %>% 
  mutate(study = "GSE189039")



##### TODAS AS DEGS

gse_201530_189039 = bind_rows(gse201530_DEGs,
                              gse189039_DEGs) %>% 
  select(genes:condition, direction) %>% 
  merge(ann_vaccines, by = "condition", all.y = F) %>% 
  clean_names()

degs_allstudies_updown_p_05 = degs_allstudies_updown_p_05 %>% 
  clean_names() %>% 
  select(genes:direction, -vaccine, -gse_id) %>% 
  merge(ann_vaccines, by = "condition", all.y = F) %>% 
  select(-participants)


all_degs_p_05_vac_infected = bind_rows(degs_allstudies_updown_p_05,
                                       gse_201530_189039) %>% 
  mutate_all(~ replace(., is.na(.), "NA")) %>% 
  mutate(regimen = case_when(
regimen == "BNT-I-BNT" ~ "V-I-V",
regimen == "H-BNT-I" ~ "V-I",
regimen == "VAC-I" ~ "V-I",
regimen == "I-VAC-I" ~ "I-V-I",
regimen == "H-V-I" ~ "V-I",
regimen == "H-I-I" ~ "I-I",
regimen == "H-I" ~ "I",
TRUE ~ as.character(regimen)))

#Salvar
write.csv(gse_201530_189039, file = "gse_201530_189039_DEGs_27-11-23.csv")

#Salvar
write.csv(all_degs_p_05_vac_infected, file = 'all_degs_p_05_vac_infected_27-11-23.csv')
```

Comparar numero de participantes
```{r}

ann_vaccines = ann_vaccines_29_11_23 %>% 
  clean_names() %>% 
  mutate(condition = factor(condition, levels = rev(condition)))


plot = ann_vaccines %>% 
  ggplot() +
  aes(x = condition, 
      y = participants, 
      fill = gse_id) +
  geom_col() +
  labs(title = "Participants by condition") +
  coord_flip() +
  theme_minimal() +
  ylim(0, 70) +
  scale_fill_manual(
    values = c("GSE199750" = "#80ffdb",
               "GSE201533" = "#5e60ce", 
               "GSE201530" = "#7400b8",
               "GSE206023" = "#4361ee",
               "GSE189039" = "#48bfe3")) +
  geom_text(aes(label = participants, y = participants), 
            position = position_dodge(width = 0.9), 
            vjust = 0.5, 
            hjust = -0.2, 
            size = 3)

ggsave(plot, file = "Samples_per_condition_by_GSE.png", width = 6, height = 10)
ggsave(plot, file = "Samples_per_condition_by_GSE_2.png", width = 10, height = 6)

```

Comparar sexo
```{r}

esquisser(Demographics)

Sex = Demographics %>%
 filter(!(Sub %in% "Mean")) %>%
 ggplot() +
  aes(x = Study, y = Value, fill = Sub) +
  geom_col() +
  scale_fill_manual(
    values = c(Female = "#BA27FF",
    Male = "#1EC3FF")) +
  theme_minimal() +
  facet_wrap(vars(Category)) +
  geom_text(aes(label = Value, y = Value), 
            vjust = 2, 
            hjust = 0.5, 
            size = 4) +
  xlab("GSE ID") +
  ylab("Percentage") +
  labs(title = "Sex",
       fill = "Sex")

ggsave(Sex, file = "Demographics_Sex_1.png", width = 10, height = 6)
ggsave(Sex, file = "Demographics_Sex_2.png", width = 6, height = 10)



Age = Demographics %>%
 filter(!(Category %in% "Sex")) %>%
 ggplot() +
  aes(x = Study, y = Value, fill = Sub) +
  geom_col() +
  scale_fill_manual(
    values = c(Mean = "#1EC3FF")) +
  theme_minimal() +
  facet_wrap(vars(Category)) +
  geom_text(aes(label = Value, y = Value), 
            vjust = 2, 
            hjust = 0.5, 
            size = 4) +
  xlab("GSE ID") +
  ylab("Age") +
  labs(fill = "Age")

ggsave(Age, file = "Demographics_Age_1.png", width = 10, height = 6)
ggsave(Age, file = "Demographics_Age_2.png", width = 6, height = 10)

```


```{r}
ann_vaccines = ann_vaccines_29_11_23 %>% 
  clean_names() %>% 
  mutate(condition = factor(condition, levels = rev(condition)))

plot = ann_vaccines %>% 
  ggplot() +
  aes(x = condition, 
      y = participants, 
      fill = gse_id) +
  geom_col() +
  labs(title = "Participants by condition") +
  coord_flip() +
  theme_minimal() +
  ylim(0, 70) +
  scale_fill_manual(
    values = c("GSE199750" = "#80ffdb",
               "GSE201533" = "#5e60ce", 
               "GSE201530" = "#7400b8",
               "GSE206023" = "#4361ee",
               "GSE189039" = "#48bfe3")) +
  geom_text(aes(label = participants, y = participants), 
            position = position_dodge(width = 0.9), 
            vjust = 0.5, 
            hjust = -0.2, 
            size = 3)

ggsave(plot, file = "Samples_per_condition_by_GSE.png", width = 6, height = 10)
ggsave(plot, file = "Samples_per_condition_by_GSE_2.png", width = 10, height = 6)


```



Combinar tabela de genes sequenciados (raw)

```{r}
#TABELA DE GENES SEQUENCIADOS

gse201530_genes = gse201530_counts_ready %>%
  rownames_to_column("genes") %>% 
  select(genes) %>% 
  mutate(study = "GSE201530")

gse199750_genes = gse199750_counts_ready %>%
  as.data.frame() %>% 
  rownames_to_column("genes") %>% 
  select(genes) %>% 
  mutate(study = "GSE199750")

gse189039_genes = gse189039_counts_ready %>%
  as.data.frame() %>% 
  rownames_to_column("genes") %>% 
  select(genes) %>% 
  mutate(study = "GSE189039")

gse201533_genes = gse201533_counts_ready %>%
  as.data.frame() %>% 
  rownames_to_column("genes") %>% 
  select(genes) %>% 
  mutate(study = "GSE201533")

gse206023_genes = gse206023_counts_long_annotated_14_11_23 %>% 
  select(genes) %>% 
  distinct() %>% 
  mutate(study = "GSE206023")


genes_raw_combined = bind_rows(gse206023_genes,
                               gse201533_genes,
                               gse189039_genes,
                               gse199750_genes,
                               gse201530_genes)


write.csv(genes_raw_combined, file = "genes_raw_combined.csv")

```


# 5. Visualização de dados

## DEGs plot

```{r}

ann_vaccines = ann_vaccines_27_11_23 %>% clean_names()

all_degs = all_degs_p_05_vac_infected %>% 
  group_by(condition, direction) %>% 
  summarise(DEGs = n()) %>% 
  merge(ann_vaccines, by="condition", all.y = F)

all_degs_wide = all_degs %>% 
  pivot_wider(names_from = "direction", values_from = "DEGs") %>% 
  mutate(TOTAL = sum(DOWN + NEUTRAL + UP)) %>% 
  merge(ann_vaccines, by="condition", all.y = F)

write.csv(all_degs_wide, file = "all_degs_wide.csv")



# Plot 1
ggplot_degs_bars = all_degs %>%
 filter(!(direction %in% "NEUTRAL")) %>%
 ggplot() +
  aes(x = reorder(condition, day), fill = direction, weight = DEGs) +
  geom_bar(position = "dodge") +
  scale_fill_manual(
    values = c(DOWN = "#0E252D",
    NEUTRAL = "#00C19F",
    UP = "#3399FF")) +
  theme_minimal() +
  facet_wrap(vars(vac_infec), scales = "free", nrow = 2) +
  geom_text(aes(label = DEGs, y = DEGs), 
            position = position_dodge(width = 0.9), 
            vjust = -1, 
            hjust = 0.5, 
            size = 2) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7),
        axis.text.y = element_text(vjust = 2)) +
  ggtitle("DEGs - all studies, p<0.05")

ggsave(ggplot_degs_bars, file = "DEGs_up_down_barplot_pvalue05_1.png",  width = 10, height = 6) 


esquisser(all_degs)

#Plot 2
ggplot_degs_bars_2 = all_degs %>%
 filter(!(direction %in% "NEUTRAL")) %>%
 ggplot(aes(x = condition, fill = direction, weight = DEGs)) +
  geom_bar(position = "dodge") +
  scale_fill_manual(
    values = c(DOWN = "#0E252D",
    NEUTRAL = "#00C19F",
    UP = "#3399FF")) +
  theme_minimal() +
  facet_wrap(vars(vaccine), scales = "free", nrow = 2) +
  geom_text(aes(label = DEGs, y = DEGs), 
            position = position_dodge(width = 0.9), 
            vjust = -1, 
            hjust = 0.5, 
            size = 2) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7),
        axis.text.y = element_text(vjust = 2)) +
  ggtitle("DEGs - all studies, p<0.05")

ggsave(ggplot_degs_bars_2, file = "DEGs_up_down_barplot_pvalue05_2.png",  width = 10, height = 6) 



#Plot 3
ggplot_degs_bars_3 = all_degs %>%
 filter(!(direction %in% "NEUTRAL")) %>%
 ggplot() +
  aes(x = condition, fill = direction, weight = DEGs) +
  geom_bar(position = "dodge") +
  scale_fill_manual(
    values = c(DOWN = "#0E252D",
    NEUTRAL = "#00C19F",
    UP = "#3399FF")) +
  theme_minimal() +
  facet_wrap(vars(vaccine), scales = "free", nrow = 7) +
  geom_text(aes(label = DEGs, y = DEGs), 
            position = position_dodge(width = 0.9), 
            vjust = -1, 
            hjust = 0.5, 
            size = 2) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7),
        axis.text.y = element_text(vjust = 2)) +
  ggtitle("DEGs - all studies, p<0.05")

ggsave(ggplot_degs_bars_3, file = "DEGs_up_down_barplot_pvalue05_3.png",  width = 5, height = 20)


#Plot 4
ggplot_degs_bars_4 = all_degs %>%
 filter(direction !="NEUTRAL",
        vac_infec != "vaccine") %>%
 ggplot() +
  aes(x = condition, fill = direction, weight = DEGs) +
  geom_bar(position = "dodge") +
  scale_fill_manual(
    values = c(DOWN = "#0E252D",
    NEUTRAL = "#00C19F",
    UP = "#3399FF")) +
  theme_minimal() +
  facet_grid(vars(prior_infection), vars(previous_vaccination), scales = "free") +
  geom_text(aes(label = DEGs, y = DEGs), 
            position = position_dodge(width = 0.9), 
            vjust = -1, 
            hjust = 0.5, 
            size = 2) +
  theme(legend.position = "top",
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7),
        axis.text.y = element_text(vjust = 2)) +
  ggtitle("DEGs - all studies, p<0.05")

ggsave(ggplot_degs_bars_4, file = "DEGs_up_down_barplot_pvalue05_4.png",  width = 10, height = 6)

```

## 5.1. Volcanoplot

Os volcanoplots das duas vacinas são os mesmos, mas espelhados. Isso significa que definir o nivel de referencia no fator de vacinas é importante para determinar as DEGs up e down. \#### ASTRAZENECA

*Astrazeneca* 

```{r}

#All
volcano_res <- EnhancedVolcano(res, 
                                    lab = rownames(res),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - All timepoints - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_allfactores_AZ.png", width = 800, height = 600)
print(volcano_res_chad)
dev.off()

#T1-T0
volcano_res_chad_1_0 <- EnhancedVolcano(res_1_0_chad, 
                                    lab = rownames(res_1_0_chad),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - D1 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D1_AZ.png", width = 800, height = 600)
print(volcano_res_chad_1_0)
dev.off()

#T2-T0
volcano_res_chad_2_0 <- EnhancedVolcano(res_time0_time2_chad, 
                                    lab = rownames(res_time0_time2_chad),
                                    x = 'log2FoldChange',
                                    y = 'pvalue',
                                    title='Differential expression of genes - D2 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D2_AZ.png", width = 800, height = 600)
print(volcano_res_chad_2_0)
dev.off()

#Plotar mais de um gráfico
plot_grid(volcano_res_chad, volcano_res_chad_1_0, volcano_res_chad_2_0, ncol = 3) 

```

## 5.2. Heatmap

```{r}
# teste heatmap
head(res)

#Obter DEGs do estudo
heat_test = res %>% 
  as.data.frame() %>% 
  rownames_to_column(var="genes")

#Obter counts dos DEGs
heat_count = countData %>% 
  rownames_to_column(var = "genes")

#Unir tabelas
###A tabela agora possui os valores de counts e outras estatísticas do objeto deseq2 pareadas pelos DEGs.
merged_heat = heat_test %>% 
  merge(heat_count, by = 'genes') %>% 
  arrange(padj)

#Cores
tp_cols = c("V0" = "steelblue2", "V1" = "goldenrod","V2A"="firebrick3","V3A"="grey44")
v_cols = c("ChAdOx1" = "violetred2", "BNT162b2" = "deepskyblue2","mRNA-1273" = "goldenrod")

# Transformar as counts dos DEGs em matriz e normalizar 
data2plot <- as.matrix(merged_heat[, 8:267])
zzz <- scale(t(data2plot)) %>%
  pmin(-3) %>%
  pmax(3))

#Transformar em dataframe
test <- zzz %>%
  as.data.frame() %>%
  mutate(genes = merged_heat$genes) %>%
  tidyr::gather("samples", "expression", -genes)
tail(test)

zzz_filtered = test %>% 
  subset(expression > 2 & samples == 'COVIRS_41_V0')
dim(zzz_filtered)
head(zzz_filtered)

#Heatmap annotation
ha = HeatmapAnnotation(df = colData[,c("timepoint","first_second_dosef")],
                       col = list(Timepoint = tp_cols,
                                  FirstSecondDosef = v_cols))

Heatmap(zzz, 
        show_row_names = FALSE,
        show_column_names = FALSE,
        cluster_rows=TRUE,
        cluster_columns = TRUE,
        top_annotation = ha,
        col=c("darkgreen","azure2","darkorchid4"),
        column_split=colData$Timepoint,
        column_title="Heatplot")

```

## 5.3. Diagrama de Venn com Heatmap

*Tabela de overlaps de DEGs com teste exato de fisher*

Determinar a proporção de DEGs compartilhadas entre condições diferentes e sua significância. O código abaixo deve ter como input uma tabela longa chamada "degs_all" (coluna 1: condicoes/vacinas, coluna 2: DEGs). O output será uma tabela longa com as colunas Vacina 1, Vacina 2, shared degs, not shared vacina 1, not shared vacina 2, lista com nomes das degs, valor p de fisher (teste exato de fisher).




```{r}

#Converter tabela de DEGs Up and Down em tabela longa
degs_all_updown = degs_updown_p_05_vac_infected %>% 
  filter(direction != "NEUTRAL") %>% 
  mutate_all(~ replace(., is.na(.), "NA"))

#Salvar
write.csv(degs_all_updown, file= "degs_updown_p<05_vac_infected.csv")

# Tabela de dados

#Up vs Up
degs_venn_up = degs_all_updown %>% 
  filter(direction == "UP") %>%
  mutate(comparison = "UP-UP") %>% 
  select(condition, genes)

#Down vs Down
degs_venn_down = degs_all_updown %>% 
  filter(direction == "DOWN") %>%
  mutate(comparison = "DOWN-DOWN") %>% 
  select(condition, genes)


# DOWN-UP ALL
degs_venn_updown = degs_all_updown %>% 
  filter(direction %in% c("UP", "DOWN")) %>%
  mutate(comparison = "UP_AND_DOWN") %>% 
  select(condition, genes)

#Up vs Down
degs_venn_up_vs_down = degs_all_updown %>% 
  mutate(comparison = "UP_vs_DOWN") %>% 
  select(condition, genes)

```


```{r}
#Input
data = degs_venn_down
filename = "degs_venn_down"

# Função para calcular a sobreposição entre pares de vacinas e a porcentagem de genes compartilhados
overlap_genes <- function(cond1, cond2, data) {
  genes_cond1 <- data$genes[data$condition == cond1] #Trocar coluna
  genes_cond2 <- data$genes[data$condition == cond2] #Trocar coluna
  genes_shared <- intersect(genes_cond1, genes_cond2)
  genes_notshared_cond1 <- setdiff(genes_cond1, genes_cond2)
  genes_notshared_cond2 <- setdiff(genes_cond2, genes_cond1)
  
  total_genes_cond1 <- length(genes_cond1)
  total_genes_cond2 <- length(genes_cond2)
  
  percentage_shared_cond1 <- length(genes_shared) / total_genes_cond1 * 100
  percentage_shared_cond2 <- length(genes_shared) / total_genes_cond2 * 100
  
  shared_genes <- data.frame(
    Cond1 = cond1,
    Cond2 = cond2,
    Shared = length(genes_shared),
    NotShared_cond1 = length(genes_notshared_cond1),
    NotShared_cond2 = length(genes_notshared_cond2),
    Total_Genes_Cond1 = total_genes_cond1,
    Total_Genes_Cond2 = total_genes_cond2,
    Genes_Names = paste(genes_shared, collapse = ", "),
    Percentage_Shared_Cond1 = percentage_shared_cond1,
    Percentage_Shared_Cond2 = percentage_shared_cond2
  )
  
  return(shared_genes)
}
# Obtenha uma lista de todas as vacinas únicas
unique_cond <- unique(data$condition)  #trocar "study" pelo nome da coluna de interesse

# Inicialize um dataframe para armazenar os resultados
shared_genes_df <- data.frame()

# Calcular a sobreposição e porcentagem para cada par de vacinas
for (i in 1:(length(unique_cond) - 1)) {
  for (j in (i + 1):length(unique_cond)) {
    resultado_temp <- overlap_genes(unique_cond[i], unique_cond[j], data)
    shared_genes_df <- bind_rows(shared_genes_df, resultado_temp)
  }
}

# Função para realizar o teste exato de Fisher
fisher_exact_test <- function(shared, notshared1, notshared2) {
  cont_table <- matrix(c(shared, notshared1, notshared2, 0), nrow = 2)
  results_fisher <- fisher.test(cont_table)
  return(results_fisher$p.value)
}

# Aplicar a função a cada linha da tabela de resultados
shared_genes_df$pvalue <- mapply(fisher_exact_test, 
                                    shared_genes_df$Shared, 
                                    shared_genes_df$NotShared_cond1, 
                                    shared_genes_df$NotShared_cond2)


# Calcular qvalue (BH) e -log(FDR)
shared_genes_df <- shared_genes_df %>%
  mutate(qvalue = qvalue(pvalue)$qvalues)

# Calcular -log(q)
shared_genes_df$`log_q`= -log(shared_genes_df$qvalue, 10)


#Duplicar e trocar colunas (Vacina 1 e Vacina 2)
shared_genes_df2 = shared_genes_df
shared_genes_df2[, c("Cond1", "Cond2")] <- shared_genes_df2[, c("Cond2", "Cond1")]

#Unir datasets com os dados espelhados
shared_genes_df_mirror = rbind(shared_genes_df, shared_genes_df2)

#Converter NA, NaNe inf em 0
shared_genes_df_mirror[is.na(shared_genes_df_mirror)] <- 0
shared_genes_df_mirror<- replace(shared_genes_df_mirror, shared_genes_df_mirror == "Inf", 0)

#Arredondar porcentagens
shared_genes_df_mirror = shared_genes_df_mirror %>%
  mutate(Percentage_Shared_Cond1 = round(Percentage_Shared_Cond1, 2),
         Percentage_Shared_Cond2 = round(Percentage_Shared_Cond2, 2))

#Salvar resultados
write.csv(shared_genes_df_mirror, file = paste0("shared_", filename, "_fisher_pvalue10.csv")) #Alterar

#Filtrar
shared_genes_df_mirror_q10 = shared_genes_df_mirror %>% 
  filter(qvalue < 0.10)
write.csv(shared_genes_df_mirror_q10, file = paste0("shared_", filename, "_fisher_p<10_qvalue10.csv"))

```


*pHeatmap*

```{r}
#Matriz
shared_heatmap = shared_genes_df_mirror_q10 %>% 
  dcast(Cond1 ~ Cond2, 
        value.var = "Percentage_Shared_Cond1", 
        fun.aggregate = mean) %>% 
  #replace(is.na(.), 0) %>% 
  column_to_rownames("Cond1") %>% 
  as.matrix()

#Anotar colunas
shared_heatmap_ann = shared_heatmap %>% 
  as.data.frame() %>% 
  rownames_to_column("condition") %>% 
  select(condition)

ann_vaccines_pheatmap = degs_all_updown %>% 
  select(condition, vaccine:variant, -type, -gse_id, -vac_infec) %>% 
  distinct() %>% 
  mutate(day = as.factor(day),
         dose = as.factor(dose),
         infection = as.factor(infection),
         variant = as.factor(variant),
         regimen = as.factor(regimen),
         severity = as.factor(severity)) %>% 
  merge(shared_heatmap_ann, by = "condition") %>% # match numero de colunas/linhas da matriz 
  column_to_rownames("condition")



#Plotar
pheatmap_shared = pheatmap(shared_heatmap,
         main= paste0("Shared %", filename, "_q<0.10_rows_vs_cols"),
         color = rev(hcl.colors(50, "Reds 3")),
         breaks = c(seq(0, 10, length.out = 0),  # Low values
                    seq(10, 50, length.out = 50), # Mid values
                    seq(50, 100, length.out = 50)), # High values,, 
         cluster_rows = T,
         cluster_cols = T,
         cutree_rows = 6,
         cutree_cols = 6,
         treeheight_row = 0, #altura do dendrograma, 0 para não mostrar
         treeheight_col = 0,
         display_numbers = T,
         number_color = "black",
         fontsize_number = 4,
         fontsize_row = 6, 
         fontsize_col = 6,
         na_col = "black",
         show_colnames = T,
         border_color = "NA",
         annotation_col = ann_vaccines_pheatmap,
         annotation_row = ann_vaccines_pheatmap,
         annotation_colors = ann_colors,
         annotation_legend = F)

#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_numbers.png"), 
    width=3000, 
    height=2000, 
    res=315, 
    pointsize=10)
print(pheatmap_shared)
dev.off()
```


```{r}
#Plotar
pheatmap_shared_nonumbers = pheatmap(shared_heatmap,
         main= paste0("Shared %", filename, "_q<0.10_rows_vs_cols"),
         color = rev(hcl.colors(50, "Reds 3")),
         breaks = c(seq(0, 10, length.out = 0),  # Low values
                    seq(10, 50, length.out = 50), # Mid values
                    seq(50, 100, length.out = 50)), # High values,, 
         cluster_rows = T,
         cluster_cols = T,
         cutree_rows = 5,
         cutree_cols = 5,
         treeheight_row = 0, #altura do dendrograma, 0 para não mostrar
         treeheight_col = 0,
         display_numbers = F,
         number_color = "black",
         fontsize_number = 4,
         fontsize_row = 6, 
         fontsize_col = 6,
         na_col = "black",
         show_colnames = T,
         border_color = "NA",
         annotation_col = ann_vaccines_pheatmap,
         annotation_row = ann_vaccines_pheatmap,
         annotation_colors = ann_colors,
         annotation_legend = F)

#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_nonumbers.png"), 
    width=3000, 
    height=2000, 
    res=315, 
    pointsize=10)
print(pheatmap_shared_nonumbers)
dev.off()
```


*Chord diagram*

```{r}
#Chord diagram
install.packages("circlize")
library(circlize)

# sharedDEGs_UPDOWN_fisher_pvalue10
# sharedDEGs_UP_fisher_p_10_pvalue10
# sharedDEGs_DOWN_fisher_p_10_pvalue10

#Input
shared_genes_df_mirror_p10 = sharedDEGs_UP_fisher_p_10_pvalue10


#All doses
circos_alldoses = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(Shared != 0)

circos_alldoses_mirror = circos_alldoses %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_alldoses)

write.csv(circos_alldoses_mirror, file = "Circos_DOWN_Alldoses_pvalue10_dose1.csv")


#Dose 1
circos_dose1 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 1,
         cond2_dose == 1)

circos_dose1_mirror = circos_dose1 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose1)

write.csv(circos_dose1_mirror, file = "Circos_DOWN_pvalue10_dose1.csv")

#Dose 2
circos_dose2 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 2,
         cond2_dose == 2)
circos_dose2_mirror = circos_dose2 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose2) 

write.csv(circos_dose2_mirror, file = "Circos_DOWN_pvalue10_dose2.csv")

#Dose 3
circos_dose3 = shared_genes_df_mirror_p10 %>% 
  select(Cond1, Cond2, Shared) %>% 
  mutate(Shared = as.numeric(Shared)) %>%  
  merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
  merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>% 
  select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>% 
  rename(cond1_dose = Dose.x,
         cond2_dose = Dose.y) %>% 
  filter(cond1_dose == 3,
         cond2_dose == 3)

 circos_dose3_mirror = circos_dose3 %>% 
  mutate(Cond1A = Cond2,
         Cond2A = Cond1) %>% 
  select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>% 
  rename(Cond1 = Cond1A,
         Cond2 = Cond2A) %>% 
  rbind(circos_dose3) 

write.csv(circos_dose3_mirror, file = "Circos_DOWN_pvalue10_dose3.csv")

```

### Visualização

Dotplot

```{r}
#### Identidade e -log(q)
install.packages("ggdendro")
library(ggdendro)
library(cowplot)
library(ggtree)
library(patchwork) 

#Filtrar valores com -log(p) >= 1
sharedDEGs_UP_DOWN_fisher_filtered <- sharedDEGs_UP_DOWN_fisher %>%
  mutate(`Significance -log(p)` = cut(`-log(p)`, 
                        breaks = c(-Inf, 1, 1.3, 2, Inf), 
                        labels = c("<1", "1-1.3", "1.3-2", ">2"))) %>%
  filter(Percentage_Shared_Cond1 >= 1, Percentage_Shared_Cond2 >= 1, `Significance -log(p)` != "<1")

#Salvar
write.csv(sharedDEGs_UP_DOWN_fisher_filtered, file = "sharedDEGs_UP_DOWN_fisher_filtered.csv")

#Visualizar
plot = ggplot(sharedDEGs_UP_DOWN_fisher, aes(x = Cond1, y = Cond2, color = Percentage_Shared_Cond1, size = Percentage_Shared_Cond1)) + 
  geom_point() +
  scale_size_continuous(range = c(2, 8)) +
  geom_text(aes(label = sprintf("%.f", Percentage_Shared_Cond1)), vjust = 0.5, hjust = 0.5, size = 2, color = "black")  +
  cowplot::theme_cowplot() + 
  theme(axis.line = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10),
        axis.text.y = element_text(size = 10)) +
  scale_color_gradient(low = "white", high = "#ef233c", limits = c(0, 60)) +
  ggtitle("%Identity between vaccines, Up and Down-Regulated, pvalue < 0.05" ) +
  ylab('Cond2') +
  theme(axis.ticks = element_blank()) +
  guides(color = FALSE, size = FALSE)

#Salvar
ggsave(plot, file = "sharedDEGs_UP_DOWN_fisher.png")

#Display
plot
```






# Single Sample GSEA (ssGSEA)
## ImmuneGO
```{r}
library(corto)
############## Arquivos necessários

#CellMarker_ImmuneCells.csv 
#ImmuneGO_Annotated_Genes_Nested_Interesting.csv
ann_vaccines #Anotação das vacinas

############## GSE199750
gse199750_metadata_annotated	= gse199750_metadata_annotated_16_11_23 %>% 
  mutate(condition = case_when(
    condition == "BNT (V1, D6)BNT" ~ "BNT (V1, D6)",
    condition == "BNT (V1, D6)MO" ~ "BNT-MO (V1, D6)",
    TRUE ~ condition)) %>% 
  select(sample, condition, geo, day, dose, age, sex)

#Salvar
write.csv(gse199750_metadata_annotated, file = "gse199750_metadata_annotated_22-11-23.csv")

gse199750_counts_ready #matriz de expressão


############## GSE201533

gse201533_metadata_annotated = gse201533_metadata_long_annotated_14_11_23 %>% #Metadados
  select(condition:vaccine)

gse201533_counts_ready #Matriz de expressão


############## GSE206023
gse206023_metadata_ready = gse206023_counts_long_annotated_14_11_23 %>%  #Metadados
  select(condition:age) %>% 
  distinct() %>% 
  mutate(type = if_else(type=="HO", "IN", type))

write.csv(gse206023_metadata_ready, file = "gse206023_metadata_ready.csv")

#Matriz de expressão
gse206023_counts_ready = gse206023_counts_long_annotated_14_11_23 %>% 
  mutate(counts = as.numeric(counts)) %>% 
  select(genes, sample, counts) %>% 
  pivot_wider(names_from = sample, values_from = counts, values_fn = mean) %>% 
  column_to_rownames("genes")

write.csv(gse206023_counts_ready, file = "gse206023_counts_ready.rds")

```


```{r}
############## Inputs

ssgsea_gene = gse206023_counts_ready #Criar variável
metadata = gse206023_metadata_ready
filename = "GSE206023"
go_dataset = "CellMarker"

#Extrair sample names. O ssGSEA perde o nome no resultado.
sample_names = ssgsea_gene %>% 
  as.data.frame() %>% 
  colnames() %>% 
  as.data.frame() %>% 
  rename(sample_names = '.')

#Tabela com genes de interesse (geral) convertida em listas 

genelist = CellMarker_ImmuneCells %>% #CellMarker_ImmuneCells.csv
  select(cell = cell_name, genes = marker)
genelist = split(genelist$genes, genelist$cell) 

############## Análise 

# Run ssGSEA
nesmat = ssgsea(ssgsea_gene, genelist)

#Padronizar resultado
nesmat.result = nesmat %>% 
  as.data.frame() %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("sample") %>% 
  relocate(sample, .before = everything()) %>% 
  cbind(sample_names) %>% 
  select(-sample) %>% 
  column_to_rownames("sample_names") %>% 
  t() %>% 
  as.data.frame() %>% 
  rownames_to_column("process") %>% 
  relocate(process, .before=everything()) %>% 
  pivot_longer(cols = -process, names_to = "sample", values_to = "nes") %>% 
  mutate(pvalue = z2p(nes), #Converter NES em pvalue
         qvalue = p.adjust(pvalue, method = "BH"), #ajustar para qvalue 
         logq = - log10(qvalue)) %>%  #Calcular -log(q)
  merge(metadata, by="sample", all.x=T) %>% #Unir com metadata
  mutate(study = filename) %>% 
  merge(CellMarker_ImmuneCells, by.x ="process", by.y = "cell_name", all.x=T) %>% 
  select(-"...1") %>% 
  rename(cell_name = process)

#Salvar
write.csv(nesmat.result, file = paste0(filename, sep = "_", go_dataset, "_ssgsea_result.csv"))

print(nesmat.result)
```

*Unir resultados*

```{r}

############### Unir resultados
GSE199750_CellMarker_ssgsea_result = GSE199750_CellMarker_ssgsea_result %>%
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct()

GSE201533_CellMarker_ssgsea_result = GSE201533_CellMarker_ssgsea_result %>% 
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct()

GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>% 
  clean_names() %>% 
  select(cell_name:type, -marker) %>% 
  distinct() 
  
  
GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>% 
  mutate(age = as.double(age))

ssgsea_results_unified = bind_rows(GSE199750_CellMarker_ssgsea_result,
          GSE201533_CellMarker_ssgsea_result,
          GSE206023_CellMarker_ssgsea_result)

ssgsea_results_unified = ssgsea_results_unified %>% 
  select(cell_name:immune_system) %>% 
  merge(ann_vaccines, by.x = "condition", by.y = "Condition")

#Salvar
write.csv(ssgsea_results_unified, file = paste0(go_dataset, "_ssgsea_results_unified.csv"))

```


### Visualização

```{r}

# INPUT
nesmat.result = GSE199750_ssgsea_result
filename = "GSE199750"

#Filtrar

#Lista de processos

ssgsea_interesting = nesmat.result %>% 
  filter(pvalue <= 0.10) %>% 
  distinct()

#Boxplot (NES)
NES_plot = ggplot(ssgsea_interesting) +
  aes(x = condition, y = nes, fill = vaccine) +
  geom_boxplot() +
  scale_fill_hue(direction = 1) +
  theme_minimal() +
  facet_grid(vars(process), vars(vaccine), scales = "free_x") +
 labs(title = paste0(filename, "_ImmuneGO ssGSEA"), 
      fill = "vaccine") +
 theme_bw() +
 theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
       axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7)) 

#Salvar
ggsave(NES_plot, file= paste0(filename, "_ssGSEA_NES_qvalue010.png"), width = 10, height = 20)
```


```{r}
ssgsea_interesting.unified = ssgsea_results_unified %>% 
  filter(!condition %in% c("BNT-MO (V1, D0)", "BNT-MO (V2, D0)")) %>% 
  mutate(day = as.factor(day),
         dose = as.factor(dose))

ssgsea_interesting.general.unified = ssgsea_interesting.unified %>% 
  filter(process %in% c("ADAPTIVE IMMUNE SYSTEM", 
                        "CELLULAR ADAPTIVE IMMUNE SYSTEM", 
                        "INNATE IMMUNE SYSTEM", 
                        "INFLAMMATION"),
         pvalue <= 0.10)

ssgsea_interesting.specific.unified = ssgsea_interesting.unified %>% 
  filter(!process %in% c("ADAPTIVE IMMUNE SYSTEM", 
                         "CELLULAR ADAPTIVE IMMUNE SYSTEM", 
                         "INNATE IMMUNE SYSTEM", 
                         "INFLAMMATION"),
         pvalue <= 0.10)


data = ssgsea_results_unified
filename.unified = "ssgsea_results_unified"
```


```{r}
#All conditions divided by dose, time on x-axis

# Filtrar os dados antes de passar para ggplot
filtered_data <- data %>%
  group_by(cell_name, Vaccine, condition) %>%
  summarise(n = n()) %>%
  filter(n < 5) %>%
  pull(condition)


NES_plot_unified_dayx <- ggplot(data) +
  aes(x = day, y = nes, fill = Vaccine) +
  geom_boxplot(outlier.alpha = 1) +
  geom_point(data = subset(data, day %in% filtered_data),
             aes(x = day, y = nes, color = Vaccine),
             position = position_jitter(width = 0.2), alpha = 1) +
  scale_fill_manual(values = vaxcolors) +
  scale_color_manual(values = c(BBIBP = "#ffd000", 
                                ZF2001 = "#b5179e",
                                BNT = "#56cfe1",
                                ChAd = "#80ffdb" ,
                                "ChAd-BNT" = "#72efdd")) +
  labs(
    title = "ssGSEA - All studies",
    subtitle = "General ImmuneGO, by dose",
    caption = "Vaccine",
    fill = "Vaccines"
  ) + 
  theme_minimal() +
  facet_grid(vars(Type), vars(dose), scales = "free_x") +
  theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
        strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 5),
        strip.text.x = element_text(size = 10))

#Salvar
ggsave(NES_plot_unified_dayx, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_2.png"), width = 10, height = 10)

print(NES_plot_unified_dayx)

```



```{r}
#All conditions divided by vaccine, time on x-axis

# Criar o gráfico
NES_plot_unified_dayx_vax <- ggplot(data) +
  aes(x = condition, y = nes, fill = dose) +
  geom_boxplot(outlier.alpha = 0.5) +
  geom_point(data = subset(data, condition %in% filtered_data),
             aes(x = condition, y = nes, color = "black"),
             position = position_jitter(width = 0.2), alpha = 1) +
  scale_fill_manual(values = dose_colors) +
  scale_color_manual(values = dose_colors) +
  labs(
    title = "ssGSEA - All studies",
    subtitle = "General ImmuneGO, by dose",
    caption = "Vaccine",
    fill = "Vaccines"
  ) +
  theme_minimal() +
  facet_grid(vars(process), vars(vaccine), scales = "free") +
  theme(
    plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 7),
    strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 7)  # Rotação do texto da facet row
  )

#Salvar
ggsave(NES_plot_unified_dayx_vax, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_3.png"), width = 20, height = 20)

print(NES_plot_unified_dayx_vax)
```





###  Analise estatística

```{r}
install.packages("ggstatsplot")
library(ggstatsplot)

ssgsea_interesting = ssgsea_interesting %>% 
  filter(process == "INNATE IMMUNE SYSTEM") %>% 
  distinct()

process = "INNATE IMMUNE SYSTEM"


ggbetweenstats(
  data  = ssgsea_interesting,
  x     = condition,
  y     = nes,
  title =  paste0("ImmuneGO ssGSEA ", filename, process)
)

#Agrupado
grouped_ggbetweenstats(
  data             = ssgsea_interesting,
  x                = condition,
  y                = nes,
  grouping.var     = process,
  ggsignif.args    = list(textsize = 4, tip_length = 0.01),
  p.adjust.method  = "bonferroni",
  palette          = "default_jama",
  package          = "ggsci",
  plotgrid.args    = list(nrow = 1),
  annotation.args  = list(title = paste0("ImmuneGO ssGSEA ", filename))
)

```


### ANOVA

```{r}
esquisser(test)

library(ggsignif)
library(rstatix)

df = test
df$Condition <- as.factor(df$Condition)

stat.test <- df %>%
  t_test(Condition ~ NES) %>%
  add_significance()


test %>%
  filter(Process %in% "INNATE IMMUNE RESPONSE") %>%
  ggboxplot(x = "Process", y = "NES",
          color = "Condition", palette = "jco") +
  facet_wrap(vars(Vaccine)) +
  stat_compare_means(method = "anova", label.y = 4) +      # Add global p-value
  stat_compare_means(label = "p.signif", method = "t.test",
                     ref.group = ".all.")    # Comparação múltipla usando Tukey HSD


# Box plots with p-values
bxp <- ggboxplot(df, x = "Process", y = "NES", fill = "#00AFBB")
stat.test <- stat.test %>% add_xy_position(x = "supp")
bxp + 
  stat_pvalue_manual(stat.test, label = "p") +
  scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))

```

### PCA

<https://www.datacamp.com/tutorial/pca-analysis-r> 
<https://rpkgs.datanovia.com/factoextra/index.html> <http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/114-mca-multiple-correspondence-analysis-in-r-essentials/>
<https://statisticsglobe.com/pca-before-k-means-clustering-r>>

Bibliotecas
```{r}
library(factoextra)
library(caret)
library(stats)
library(ggfortify)
library(ggplot2)
```

#### By GO term

```{r}

ssgsea_results_unified = ssgsea_results_unified %>% distinct() 
  
ssgsea_results_unified.innate = ssgsea_results_unified %>% 
  filter(process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES"))

ssgsea_results_unified.adaptive = ssgsea_results_unified %>% 
  filter(!process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES")) %>% 
  filter(!study == "GSE201533")
  

```


```{r}
data_ImmuneGO = ssgsea_results_unified.adaptive 
filename = "ssgsea_results_unified.adaptive"

pca_ann = data_ImmuneGO %>% 
  select(sample, condition, vaccine, geo:sex)

#Converter de long para wide
matrix_genes = data_ImmuneGO %>% 
  select(sample, process, nes)

matrix_genes <- dcast(matrix_genes, `sample` ~ `process`, 
                     value.var = "nes", 
                     fun.aggregate = mean)

matrix_genes <- replace(matrix_genes, matrix_genes == "NaN", 0) 

#Converter coluna 1 em rownames e excluir
matrix_data_pca = matrix_genes %>% 
  as.data.frame()

matrix_data_pca_ready = matrix_data_pca %>%  
  column_to_rownames("sample")

ann_vaccines_pca_matrix = matrix_data_pca %>% 
  merge(pca_ann, by.y = "sample", by.x = "sample", all.x = T, all.y = F) %>% 
  distinct()

ann_vaccines_pca_matrix_ready = ann_vaccines_pca_matrix %>% 
  select(sample, condition, vaccine, day, dose) %>% 
  mutate(dose = as.factor(dose),
         day = as.factor(day))

# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)

# Crie o gráfico de PCA

###### Condition
pca_plot_condition = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'condition', 
                    label = 0) + 
  labs(title=paste0(filename, "_bycondition")) + #scale fill manual
  scale_fill_continuous(type = "viridis") +
  theme_minimal()


###### Vaccine
pca_plot_vac = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'vaccine', 
                    label = 0) + 
  labs(title=paste0(filename, "_byvaccine")) + #scale fill manual
  scale_color_manual(
    values = c(
  "BBIBP" = "#ffd000",
  "BNT" = "#56cfe1",
  "ChAd" = "#80ffdb",
  "ChAd-BNT" = "#0077b6",
  "ZF2001" = "#b5179e")) +
  theme_minimal()

###### Day
pca_plot_day = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'day', 
                    label = 0) + 
  labs(title=paste0(filename, "_day")) + #scale fill manual
   scale_color_manual(
     values = c(
       "0" = "grey90",
    "1" = "#caf0f8",
    "3" = "#90e0ef",
    "6" = "#00b4d8",
    "7" = "#0077b6",
    "14" = "#023e8a",
    "28" = "#03045e")) +
  theme_minimal()

###### Dose

pca_plot_dose = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix_ready, 
                    colour = 'dose', 
                    label = 0) + 
  labs(title=paste0(filename, "_dose")) +
  scale_color_manual(
    values = c("0" = "#caf0f8", 
               "1" = "#56cfe1", 
               "2" = "#5978d4",
               "3" = "#b5179e"))+
  theme_minimal()

# Exiba o gráfico
print(pca_plot_condition)
print(pca_plot_day)
print(pca_plot_dose)
print(pca_plot_vac)

ggsave(pca_plot_condition, filename = paste0(filename, "CONDITION_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_vac, filename = paste0(filename, "_VACCINE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_day, filename = paste0(filename, "_DAY_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_dose, filename = paste0(filename, "_DOSE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)

```


```{r}
############### KNN classification

#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
fviz_nbclust(pca_scores,  
                     FUNcluster = kmeans,
                     method = "wss")

set.seed(123)                             # Set seed for randomization
kmeans_clust <- kmeans(pca_scores,        # Perform k-means clustering
                        centers = 3) # Definir numero de clusters

#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
                   habillage = kmeans_clust$cluster,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

print(ggp2)

ggsave(ggp2, file = paste0(filename, "_KNN_Clustered.png"), width = 5, height = 4)


# Clusterizar por grupo

#Vaccine
pca_group_vac <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$vaccine,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Vac_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

#Dose
pca_group_dose <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$dose,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Dose_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")

#Day
pca_group_day <- fviz_pca_ind(pca_res,
                   habillage = ann_vaccines_pca_matrix_ready$day,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "t",
                   label = "none",
                   labelsize = 0) +
  guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "_Day_KNN_Clustered.png")) +
  scale_color_brewer(palette="Set1")


#Salvar
ggsave(pca_group_day, file = paste0(filename, "_Day_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_dose, file = paste0(filename, "_Dose_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_vac, file = paste0(filename, "_Vac_Clustered.png"), width = 5, height = 4)
```


```{r}
#Biplot
biplot = fviz_pca_biplot(pca_res,              # Visualize clusters in biplot
                      col.var = "black",
                      alpha.var = 0.5,
                      habillage = kmeans_clust$cluster,
                      repel = TRUE,
                      addEllipses = TRUE,
                      ellipse.type = "convex",
                      labelsize = 3,
                      label = "var",
                      palette = "Set1")


ggsave(biplot, file = paste0(filename, "_BIPLOT_KNN_Clustered.png"), width = 10, height = 8)
```


```{r}
########Correlation plot
corr_matrix = cor(matrix_data_pca_ready) 

#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) + 
  theme(axis.text.x = element_text(angle = 90, size = 5), 
        axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"), 
       width = 4, #Grande 20, pequeno 10
       height= 4) #Grande 20, pequeno 10
print(corrplot)
data.pca <- princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs


#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs

#########Scree plot
scree_plot = fviz_eig(data.pca, 
                      addlabels = TRUE,
                      ylim = c(0, 70)) +
  geom_col(color = "#00AFBB", fill = "#00AFBB") +
  theme_classic()

#Salvar
ggsave(scree_plot, file = paste0(filename, "_GSEA_screeplot.png"), width = 10, height = 3) 
print(scree_plot)


#Scree plot
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))

#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) + 
  ggtitle(paste0("Comp1-Comp2 Genes", filename)) + 
  ylab("Comp1") +
  xlab("Gene sets") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores

print(loadings_1_plot)


loadings_2_plot = ggplot(loadings_2, aes(x = reorder(Genes, -Comp.2), y=Comp.2, fill = Comp.2)) + 
  ggtitle(paste0("Comp2-Comp3 Genes_", filename)) + 
  ylab("Comp2") +
  xlab("Gene sets") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores

print(loadings_2_plot)


#Salvar
ggsave(loadings_1_plot, 
       file = paste0(filename, "_GSEA_genescomp1-2.png"), 
       width = 10, height = 5)

#Salvar
ggsave(loadings_2_plot, 
       file = paste0(filename, "_GSEA_genescomp2-3.png"), 
       width = 10, height = 5)


# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca, 
                                  col.ind = "cos2",
                                  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
fviz_pca_var_genes

ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_GSEA.png"), width = 10)

cos2.1 = fviz_cos2(data.pca, choice = "var", axes = 1:2)
cos2.2 = fviz_cos2(data.pca, choice = "var", axes = 2:3)

ggsave(cos2.1, file = paste0(filename, "_cos2_GSEA_1.png"), width = 10)
ggsave(cos2.2, file = paste0(filename, "_cos2_GSEA_2.png"), width = 10)

```


#### By genes


Processar dados
```{r}
# Immune_GO_general_innate
# Immune_GO_adaptive

#INPUT
data_genes = ssgsea_results_unified
filename = "ssgsea_results_unified"
```

PCA

```{r}
#Converter de long para wide
matrix_genes = data_genes %>% 
  select(study, genes, l2fc) %>%  
  dcast(`study` ~ `genes`, 
        value.var = "l2fc", 
        fun.aggregate = mean) %>% 
  as.data.frame() %>% 
  column_to_rownames(var = "study") %>% 
  replace(is.na(.), 0)

matrix_data_pca = matrix_genes %>% 
  rownames_to_column(var = "Condition")

matrix_data_pca_ready = matrix_data_pca %>% 
  column_to_rownames(var = "Condition")

ann_vaccines_pca_matrix = ann_vaccines_pca %>% 
  merge(matrix_data_pca, 
        by.x = "Condition", 
        all.x = F, 
        all.y = F) %>% 
  select(Condition:Efficacy)

# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)

# Crie o gráfico de PCA
pca_plot_ann = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix, 
                    colour = 'Vaccine', 
                    label = 1) + #1: display, 0: hide
  labs(title=filename) +
  theme_classic()
  # scale_color_manual(values = c(BBIBP = "#black", 
  #                               ZF2001 = "black",
  #                               BNT = "#56cfe1",
  #                               ChAd = "#80ffdb" ,
  #                               "ChAd-BNT" = "#72efdd")) 

pca_plot_not_ann = autoplot(pca_res, 
                    data = ann_vaccines_pca_matrix, 
                    colour = 'Vaccine', 
                    label = 0) + #1: display, 0: hide
  labs(title=filename) +
  theme_classic() +
  stat_ellipse(type = "t")

#Salvar
ggsave(pca_plot_not_ann, filename = paste0(filename, "_PCA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_ann, filename = paste0(filename, "_PCA_ann.png"), width = 10, height = 8)

# Exiba o gráfico
print(pca_plot_ann)
print(pca_plot_not_ann)

############### KNN classification

#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
ggp1 <- fviz_nbclust(pca_scores,  
                     FUNcluster = kmeans,
                     method = "wss")

ggp1 

set.seed(123)                             # Set seed for randomization
kmeans_clust <- kmeans(pca_scores,        # Perform k-means clustering
                        centers = 4) # Definir numero de clusters

#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
                   habillage = kmeans_clust$cluster,
                   repel = TRUE,
                   addEllipses = TRUE,
                   ellipse.type = "convex",
                   labelsize = 2) +
     guides(color = guide_legend(override.aes = list(label = ""))) +
  ggtitle(paste0(filename,  "KNN_Clustered.png"))

print(ggp2)
ggsave(ggp2, file = paste0(filename, "KNN_Clustered.png"), width = 5, height = 4)


```


```{r}
########Correlation plot
#Matriz
corr_matrix = cor(matrix_data_pca_ready) 

#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) + 
  theme(axis.text.x = element_text(angle = 90, size = 5), 
        axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"), 
       width = 20, #Grande 20, pequeno 10
       height= 20) #Grande 20, pequeno 10
print(corrplot)

#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs

#########Scree plot
scree_plot = fviz_eig(data.pca, 
                      addlabels = TRUE,
                      ylim = c(0, 70)) +
  geom_col(color = "#00AFBB", fill = "#00AFBB") +
  theme_classic()

#Salvar
ggsave(scree_plot, file = paste0(filename, "_screeplot.png"), width = 10, height = 3) 
print(scree_plot)

#Comp1-Comp2
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))

#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) + 
  ggtitle(paste0("Comp1-Comp2 Genes", filename)) + 
  ylab("Comp1") +
  xlab("Genes") +
  geom_bar(stat = "identity") + 
  theme_light() +
  theme(axis.text.x = element_text(vjust = 0.1, hjust = 1, angle = 90, size = 3), 
        axis.text.y = element_text(size = 5),
        plot.title = element_text(size = 15L, hjust = 0.5)) + 
  scale_fill_continuous(type = "viridis") #cores
  
#Salvar
ggsave(loadings_1_plot, 
       file = paste0(filename, "_genescomp1-2.png"), 
       width = 10, height = 5)

print(loadings_1_plot)
```


```{r}
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca, 
                                  col.ind = "cos2",
                                  gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))

#Salvar
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_genes.png"), width = 10)

######### COS2
cos2 = fviz_cos2(data.pca, 
                 choice = "var", 
                 axes = 1:2) + 
  theme(axis.text.x = element_text(angle=45, 
                                   size = 3)) +
  theme_light()

#Salvar
ggsave(cos2, file = paste0(filename, "_cos2.png"), width = 10, height = 5)

#Colorido
fviz_pca_var(data.pca, col.var = "cos2",
            gradient.cols = c("black", "orange", "green"),
            repel = TRUE)

#Print
print(fviz_pca_var_genes)
print(cos2)
```


## CellMarker

```{r}

```








#GSEA

## Manual
```{r}
######### INPUT AQUI ######### 
# all_degs_p_05_vac_infected = all_degs_p_05_vac_infected_27_11_23 %>%
#   mutate(condition = case_when(condition == "H-BNT-I (D1)" ~ "BNT-I (D1)",
#                                condition == "H-BNT-I (D3)" ~ "BNT-I (D3)",
#                                condition == "H-BNT-I (D4)" ~ "BNT-I (D4)",
#                                condition == "H-I-I (D2)" ~ "I-I (D2)",
#                                condition == "I-BNT-I (D2)" ~ "I-BNT-I (D2)",
#                                condition == "I-BNT-I (D5)" ~ "I-BNT-I (D5)",
#                                condition == "H-BNT-I (D10, mild)" ~ "BNT-I (D10, mild)",
#                                condition == "H-BNT-I (D10, severe)" ~ "BNT-I (D10, severe)",
#                                condition == "H-BNT-I (D26, mild)" ~ "BNT-I (D26, mild)",
#                                condition == "H-BNT-I (D26, severe)" ~ "BNT-I (D26, severe)",
#                                condition == "H-I (D10, moderate)" ~ "I (D10, moderate)",
#                                condition == "H-I (D10, severe)" ~ "I (D10, severe)",
#                                condition == "H-I (D26, moderate)" ~ "I (D26, moderate)",
#                                condition == "H-I (D26, severe)" ~ "I (D26, severe)",
#                                condition == "H-I (D51, moderate)" ~ "I (D51, moderate)",
#                                condition == "H-I (D51, severe)" ~ "I (D51, severe)",
#                                TRUE ~ condition))
# 
# write.csv(all_degs_p_05_vac_infected_27_11_23, file ="all_degs_p_05_vac_infected_29_11_23.csv")

all_degs_p_05_vac_infected

#GSE199750
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V1, D6)") #Não enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT-MO (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D6)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V3, D1)") #Não enriquecido

#GSE201533
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D3)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D7)") #Enriquecido

#GSE206023
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D14)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D28)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D14)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D28)") #Enriquecido

#GSE201530
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D4)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D5)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D5)")


#GSE189039
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, severe)")

```

```{r}
#Definir nome para arquivos gerados
file_name = degs %>% 
  select(condition) %>% 
  distinct() %>% 
  as.character()

######### DEGs para ORA ######### 
gene_list_ora <- degs$genes
######### DEGs para GSEA ######### 
degs_gene_list_lf2c <- degs$log2fold_change
names(degs_gene_list_lf2c) <- degs$genes
degs_gene_list_lf2c<-na.omit(degs_gene_list_lf2c)
degs_gene_list_lf2c = sort(degs_gene_list_lf2c, decreasing = TRUE)

############ IMMUNE GO
Immune_GO_T2G = ImmuneGO_Annotated_Genes_Nested_Interesting %>% 
  select(gene_set_short, genes)

immune_GO_gsea <- GSEA(degs_gene_list_lf2c, 
                       TERM2GENE = Immune_GO_T2G, 
                       minGSSize = 2,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25,
                       pAdjustMethod = "BH") %>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "ImmuneGO")

#Salvar tabela
write.csv(immune_GO_gsea, file = paste(file_name, "ImmuneGO_GSEA.csv", sep = "_"))

########### CELL MARKER

CellMarker_genes = CellMarker_ImmuneCells %>% 
  select(cell_name, marker)

cell_types_gsea_immune <- GSEA(degs_gene_list_lf2c, 
                       TERM2GENE = CellMarker_genes, 
                       minGSSize = 1,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25, 
                       pAdjustMethod = "BH") %>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "CellMarker")

#Salvar tabela
write.csv(cell_types_gsea_immune, file=paste(file_name, "CellMarker_GSEA.csv", sep = "_"))

########### VAX SIG DB

vax_sig_genes = VAX_Genes_Annotated_RAW %>% 
  select(STANDARD_NAME, GENE)

ora_degs_vax <- enricher(gene_list_ora, 
                       TERM2GENE = vax_sig_genes, 
                       minGSSize = 1,
                       maxGSSize = 1000,
                       pvalueCutoff = 0.25, 
                       pAdjustMethod = "BH")%>% 
  as.data.frame() %>% 
  arrange(qvalue) %>% 
  mutate(condition = file_name,
         gsea_enrichment = "VAXSig")

#Salvar 
write.csv(ora_degs_vax, file=paste(file_name, "VAXSig_ORA.csv", sep = "_"))


```


## Automatizado
```{r}
# Função para realizar as análises para cada valor da coluna "condition"
GSEA_ALL <- function(df, Immune_GO_T2G, CellMarker_genes, vax_sig_genes) {
  resultados <- list()

  condicoes <- df$condition %>% unique() %>% as.character()

  for (condicao in condicoes) {
    # Selecionar DEGs para a condição atual
    genes_condicao <- df$genes[df$condition == condicao]
    logfoldchange_condicao <- df$log2fold_change[df$condition == condicao]
    names(logfoldchange_condicao) <- genes_condicao
    logfoldchange_condicao <- na.omit(logfoldchange_condicao)
    logfoldchange_condicao <- sort(logfoldchange_condicao, decreasing = TRUE)

        ############ IMMUNE GO
    immune_GO_gsea <- tryCatch({
      GSEA(logfoldchange_condicao,
           TERM2GENE = Immune_GO_T2G,
           minGSSize = 2,
           maxGSSize = 1000,
           pvalueCutoff = 0.25,
           pAdjustMethod = "BH") %>%
        as.data.frame() %>%
        arrange(qvalue) %>%
        mutate(condition = condicao,
               gsea_enrichment = "ImmuneGO")
    }, error = function(e) {
      return(NULL)  # Retorna NULL caso ocorra o erro
    })

    if (!is.null(immune_GO_gsea)) {
      resultados[[paste(condicao, "ImmuneGO", sep = "_")]] <- immune_GO_gsea
    }

    ############ CELL MARKER
    cell_marker_gsea <- tryCatch({
      GSEA(logfoldchange_condicao,
           TERM2GENE = CellMarker_genes,
           minGSSize = 1,
           maxGSSize = 1000,
           pvalueCutoff = 0.25,
           pAdjustMethod = "BH") %>%
        as.data.frame() %>%
        arrange(qvalue) %>%
        mutate(condition = condicao,
               gsea_enrichment = "CellMarker")
    }, error = function(e) {
      return(NULL)  # Retorna NULL caso ocorra o erro
    })

    if (!is.null(cell_marker_gsea)) {
      resultados[[paste(condicao, "CellMarker", sep = "_")]] <- cell_marker_gsea
    }

    ############ VAX SIG DB
    vax_sig_gsea <- tryCatch({
      enricher(genes_condicao,
               TERM2GENE = vax_sig_genes,
               minGSSize = 1,
               maxGSSize = 1000,
               pvalueCutoff = 0.25,
               pAdjustMethod = "BH") %>%
        as.data.frame() %>%
        arrange(qvalue) %>%
        mutate(condition = condicao,
               gsea_enrichment = "VAXSig")
    }, error = function(e) {
      return(NULL)  # Retorna NULL caso ocorra o erro
    })

    if (!is.null(vax_sig_gsea)) {
      resultados[[paste(condicao, "VAXSig", sep = "_")]] <- vax_sig_gsea
    }
  }

  # Combinar todos os resultados em um único dataframe
  resultado_final <- bind_rows(resultados)

  return(resultado_final)
}


# Chamar a função para executar as análises para cada valor da coluna "condition"
all_degs_p_05_vac_infected_GSEA_ALL <- GSEA_ALL(all_degs_p_05_vac_infected, Immune_GO_T2G, CellMarker_genes, vax_sig_genes)

all_degs_p_05_vac_infected_GSEA_ALL = all_degs_p_05_vac_infected_GSEA_ALL %>% 
  rownames_to_column("delete") %>% 
  select(-delete, -Description)

write.csv(all_degs_p_05_vac_infected_GSEA_ALL, file = "all_degs_p_05_vac_infected_GSEA_ALL_29-11-23.csv")

# Salvar os resultados em arquivos separados por condição e tipo de análise
for (i in 1:nrow(all_degs_p_05_vac_infected_GSEA_ALL)) {
  write.csv(all_degs_p_05_vac_infected_GSEA_ALL[i, ], file = paste(all_degs_p_05_vac_infected_GSEA_ALL[i, "condition"], 
                                                 all_degs_p_05_vac_infected_GSEA_ALL[i, "gsea_enrichment"], 
                                                 ".csv", sep = "_"))
}

```

